library(tidyverse)
Warning: package ‘tidyverse’ was built under R version 4.2.2Registered S3 methods overwritten by 'dbplyr':
  method         from
  print.tbl_lazy     
  print.tbl_sql      
── Attaching packages ──────────────────────────────────────────────────────────── tidyverse 1.3.2 ──✔ ggplot2 3.4.0      ✔ purrr   0.3.4 
✔ tibble  3.1.8      ✔ dplyr   1.0.10
✔ tidyr   1.2.1      ✔ stringr 1.4.1 
✔ readr   2.1.3      ✔ forcats 0.5.2 Warning: package ‘ggplot2’ was built under R version 4.2.2── Conflicts ─────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
library(lubridate)

Attaching package: ‘lubridate’

The following objects are masked from ‘package:base’:

    date, intersect, setdiff, union
library(janitor)

Attaching package: ‘janitor’

The following objects are masked from ‘package:stats’:

    chisq.test, fisher.test
library(caret)
Warning: package ‘caret’ was built under R version 4.2.2Loading required package: lattice
Registered S3 method overwritten by 'data.table':
  method           from
  print.data.table     

Attaching package: ‘caret’

The following object is masked from ‘package:purrr’:

    lift
library(broom)
Warning: package ‘broom’ was built under R version 4.2.2
library(fastDummies)
Warning: package ‘fastDummies’ was built under R version 4.2.2
library(GGally)
Warning: package ‘GGally’ was built under R version 4.2.2Registered S3 method overwritten by 'GGally':
  method from   
  +.gg   ggplot2
library(ggfortify)
Warning: package ‘ggfortify’ was built under R version 4.2.2
library(mosaic)
Warning: package ‘mosaic’ was built under R version 4.2.2Registered S3 method overwritten by 'mosaic':
  method                           from   
  fortify.SpatialPolygonsDataFrame ggplot2

The 'mosaic' package masks several functions from core packages in order to add 
additional features.  The original behavior of these functions should not be affected by this.

Attaching package: ‘mosaic’

The following object is masked from ‘package:Matrix’:

    mean

The following object is masked from ‘package:caret’:

    dotPlot

The following objects are masked from ‘package:dplyr’:

    count, do, tally

The following object is masked from ‘package:purrr’:

    cross

The following object is masked from ‘package:ggplot2’:

    stat

The following objects are masked from ‘package:stats’:

    binom.test, cor, cor.test, cov, fivenum, IQR, median, prop.test, quantile, sd,
    t.test, var

The following objects are masked from ‘package:base’:

    max, mean, min, prod, range, sample, sum
library(mosaicData)
library(modelr)
Warning: package ‘modelr’ was built under R version 4.2.2
Attaching package: ‘modelr’

The following object is masked from ‘package:mosaic’:

    resample

The following object is masked from ‘package:ggformula’:

    na.warn

The following object is masked from ‘package:broom’:

    bootstrap
library(relaimpo)
Warning: package ‘relaimpo’ was built under R version 4.2.2Loading required package: MASS

Attaching package: ‘MASS’

The following object is masked from ‘package:dplyr’:

    select

Loading required package: boot

Attaching package: ‘boot’

The following object is masked from ‘package:mosaic’:

    logit

The following object is masked from ‘package:lattice’:

    melanoma

Loading required package: survey
Warning: package ‘survey’ was built under R version 4.2.2Loading required package: grid
Loading required package: survival

Attaching package: ‘survival’

The following object is masked from ‘package:boot’:

    aml

The following object is masked from ‘package:caret’:

    cluster


Attaching package: ‘survey’

The following object is masked from ‘package:graphics’:

    dotchart

Loading required package: mitools
Warning: package ‘mitools’ was built under R version 4.2.2This is the global version of package relaimpo.

If you are a non-US user, a version with the interesting additional metric pmvd is available

from Ulrike Groempings web site at prof.beuth-hochschule.de/groemping.
library(tidyverse)
library(glmulti)
Warning: package ‘glmulti’ was built under R version 4.2.2Loading required package: rJava
Loading required package: leaps
Warning: package ‘leaps’ was built under R version 4.2.2
avocados <- read_csv(here::here("weekend/data/avocado.csv")) %>% clean_names()
New names:Rows: 18249 Columns: 14── Column specification ─────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr   (2): type, region
dbl  (11): ...1, AveragePrice, Total Volume, 4046, 4225, 4770, Total Bags, Small Bags, Large Bags...
date  (1): Date
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
avocados %>%
  distinct(region)

avocados %>% 
  filter(region == "TotalUS",
         date == "2015-12-27")

avocados %>% 
  filter(region %in% c("Midsouth", "Northeast", "Plains", "SouthCentral", "Southeast", "West", "GreatLakes", "California"),
         date == "2015-12-27") %>% 
  group_by(type) %>% 
  summarise(total_volume = sum(total_volume),
            x4046 = sum(x4046),
            total_bags = sum(total_bags))

avocados %>% 
  filter(!region %in% c("TotalUS", "Midsouth", "Northeast", "Plains", "SouthCentral", "Southeast", "West", "GreatLakes", "California"),
         date == "2015-12-27") %>% 
  group_by(type) %>% 
  summarise(total_volume = sum(total_volume),
            x4046 = sum(x4046),
            total_bags = sum(total_bags))

# Missouth, Northeast, Plains, SouthCentral, Southeast, West, GreatLakes, California
# These are the regions that make up the total US. Above code shows that the total numbers of the categories are the same

# I will filter the data so that it only uses the lines for the above regions to avoid double counting.
# no need to worry about the smaller areas as these are contained within the bigger regions
# also i dont think it makes sense to look at regions that small
# also by the looks of it the remaining cities arent an exhaustive list 

# might be good to extract month from date as a categorical
# proportions of 4046, 4225, 4770 may be better than absolutes, also props of bag size may be better than absolute numbers 
avocados %>% 
  filter(region %in% c("Midsouth", 
                       "Northeast", 
                       "Plains", 
                       "SouthCentral", 
                       "Southeast", 
                       "West", 
                       "GreatLakes", 
                       "California")) %>% 
  mutate(a = round(small_bags + large_bags + x_large_bags, 2) - round(total_bags, 2)) %>% 
  arrange(desc(a))

avocados



alias(lm(average_price ~ ., data = avocados))
Model :
average_price ~ x1 + date + total_volume + x4046 + x4225 + x4770 + 
    total_bags + small_bags + large_bags + x_large_bags + type + 
    year + region
# for some reason the bag sizes dont properly add up to the total_bags
# without the round they are different by like 0.000000000001
# with the round there are a couple at either end out by +-1 
# this means they dont show up in alias - I'm going to manual remove x larges bags as this can be derived from the other 3
# x4046, x4225 and x 4770 dont add up to the total volume so can keep them all
avocados <- avocados %>% filter(region %in% c("Midsouth", 
                       "Northeast", 
                       "Plains", 
                       "SouthCentral", 
                       "Southeast", 
                       "West", 
                       "GreatLakes", 
                       "California")) %>% 
  mutate(month = as.factor(month(date)),
         across(where(is.character), as.factor),
         year = as.factor(year)) %>% 
  dplyr::select(-x1, -date, -x_large_bags) 
  
avocados %>% 
  dplyr::select(average_price, is.factor) %>% 
  ggpairs() 
Warning: Use of bare predicate functions was deprecated in tidyselect 1.1.0.
Please use wrap predicates in `where()` instead.
# Was:
data %>% select(is.factor)

# Now:
data %>% select(where(is.factor))

# month, type and region all quite strong

avocados %>% 
  dplyr::select(is.numeric) %>% 
  ggpairs() 
Warning: Use of bare predicate functions was deprecated in tidyselect 1.1.0.
Please use wrap predicates in `where()` instead.
# Was:
data %>% select(is.numeric)

# Now:
data %>% select(where(is.numeric))

# total volume and total bags strongest

avocados_prop <- avocados %>% 
  mutate(across(small_bags:large_bags, ~ .x / total_bags))

avocados_prop %>% 
  dplyr::select(is.numeric) %>% 
  ggpairs() 


#props don really help 
n_data <- nrow(avocados_prop)

test_index <- sample(1:n_data, size = n_data * 0.2)

test <- avocados_prop %>% 
  slice(test_index)

train <- avocados_prop %>% 
  slice(-test_index)
lm1 <- lm(average_price ~ total_volume, train)

summary(lm1)

Call:
lm(formula = average_price ~ total_volume, data = train)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.73542 -0.17104 -0.01301  0.14873  0.98879 

Coefficients:
               Estimate Std. Error t value Pr(>|t|)    
(Intercept)   1.599e+00  7.101e-03  225.20   <2e-16 ***
total_volume -1.061e-07  2.193e-09  -48.41   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.2452 on 2162 degrees of freedom
Multiple R-squared:  0.5201,    Adjusted R-squared:  0.5199 
F-statistic:  2343 on 1 and 2162 DF,  p-value: < 2.2e-16
plotModel(lm1)

plot(lm1)


# diagnostocs are mad i think this is because of the organic type
avocados_resid <- train %>% 
  add_residuals(lm1) %>% 
  dplyr::select(-average_price, -total_volume) %>% 
  dplyr::select(resid, everything()) 


avocados_resid %>% 
  dplyr::select(resid, is.factor) %>% 
  ggpairs() 


# month, type and region all quite strong

avocados_resid %>% 
 dplyr::select(is.numeric) %>% 
  ggpairs() 

train %>% 
  ggplot(aes(x = x4046, y = average_price, colour = type)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE)

# filter by type to check if the are correlated with different things

avocados %>% 
  filter(type == "conventional") %>% 
  dplyr::select(average_price, is.factor) %>% 
  ggpairs() 


# month, type and region all quite strong

avocados %>% 
  filter(type == "conventional") %>%
  dplyr::select(is.numeric) %>% 
  ggpairs() 


# total volume and total bags strongest

avocados_prop %>% 
  filter(type == "conventional") %>%
  dplyr::select(is.numeric) %>% 
  ggpairs()





avocados %>% 
  filter(type == "organic") %>% 
  dplyr::select(average_price, is.factor) %>% 
  ggpairs() 


# month, type and region all quite strong

avocados %>% 
  filter(type == "organic") %>%
  dplyr::select(is.numeric) %>% 
  ggpairs() 


# total volume and total bags strongest

avocados_prop %>% 
  filter(type == "organic") %>%
  dplyr::select(is.numeric) %>% 
  ggpairs() 



#props don really help 
# going to do type first

lm2 <- lm(average_price ~ type, train)

summary(lm2)

Call:
lm(formula = average_price ~ type, data = train)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.78247 -0.17247 -0.01781  0.15702  0.96753 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 1.123148   0.007792  144.14   <2e-16 ***
typeorganic 0.489327   0.010994   44.51   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.2557 on 2162 degrees of freedom
Multiple R-squared:  0.4781,    Adjusted R-squared:  0.4779 
F-statistic:  1981 on 1 and 2162 DF,  p-value: < 2.2e-16
plot(lm2)

NA
NA
avocados_resid <- train %>% 
  add_residuals(lm2) %>% 
  dplyr::select(-average_price, -type) %>% 
  dplyr::select(resid, everything()) 


avocados_resid %>% 
  dplyr::select(resid, is.factor) %>% 
  ggpairs() 


# month, type and region all quite strong

avocados_resid %>% 
  dplyr::select(is.numeric) %>% 
  ggpairs() 

lm3 <- lm(average_price ~ type + region, train)

summary(lm3)

Call:
lm(formula = average_price ~ type + region, data = train)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.74699 -0.13834 -0.02354  0.11732  0.99309 

Coefficients:
                    Estimate Std. Error t value Pr(>|t|)    
(Intercept)         1.159535   0.014116  82.142  < 2e-16 ***
typeorganic         0.489463   0.009401  52.064  < 2e-16 ***
regionGreatLakes   -0.072013   0.018815  -3.827 0.000133 ***
regionMidsouth     -0.001411   0.018798  -0.075 0.940167    
regionNortheast     0.194000   0.018869  10.281  < 2e-16 ***
regionPlains        0.031770   0.018763   1.693 0.090557 .  
regionSouthCentral -0.302675   0.018646 -16.233  < 2e-16 ***
regionSoutheast    -0.007822   0.018835  -0.415 0.677976    
regionWest         -0.122087   0.018815  -6.489 1.07e-10 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.2186 on 2155 degrees of freedom
Multiple R-squared:  0.6198,    Adjusted R-squared:  0.6184 
F-statistic: 439.2 on 8 and 2155 DF,  p-value: < 2.2e-16
plot(lm3)


anova(lm2, lm3)
Analysis of Variance Table

Model 1: average_price ~ type
Model 2: average_price ~ type + region
  Res.Df    RSS Df Sum of Sq      F    Pr(>F)    
1   2162 141.38                                  
2   2155 102.99  7    38.391 114.76 < 2.2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
avocados_resid <- train %>% 
  add_residuals(lm3) %>% 
  dplyr::select(-average_price, -type, -region) %>% 
  dplyr::select(resid, everything()) 


avocados_resid %>% 
  dplyr::select(resid, is.factor) %>% 
  ggpairs() 


# month, type and region all quite strong

avocados_resid %>% 
  dplyr::select(is.numeric) %>% 
  ggpairs() 

lm4 <- lm(average_price ~ type + region + month, train)

summary(lm4)

Call:
lm(formula = average_price ~ type + region + month, data = train)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.65276 -0.11864 -0.00312  0.10866  0.82500 

Coefficients:
                    Estimate Std. Error t value Pr(>|t|)    
(Intercept)         1.062100   0.017046  62.308  < 2e-16 ***
typeorganic         0.491225   0.008080  60.792  < 2e-16 ***
regionGreatLakes   -0.068198   0.016167  -4.218 2.56e-05 ***
regionMidsouth      0.006574   0.016154   0.407  0.68408    
regionNortheast     0.196708   0.016215  12.131  < 2e-16 ***
regionPlains        0.034712   0.016121   2.153  0.03141 *  
regionSouthCentral -0.303184   0.016021 -18.924  < 2e-16 ***
regionSoutheast    -0.004919   0.016186  -0.304  0.76121    
regionWest         -0.125033   0.016174  -7.731 1.63e-14 ***
month2             -0.055798   0.018249  -3.058  0.00226 ** 
month3             -0.002364   0.018069  -0.131  0.89591    
month4              0.043316   0.019043   2.275  0.02302 *  
month5              0.026449   0.018635   1.419  0.15596    
month6              0.087948   0.019977   4.403 1.12e-05 ***
month7              0.150393   0.018715   8.036 1.52e-15 ***
month8              0.206713   0.019271  10.726  < 2e-16 ***
month9              0.283468   0.019550  14.500  < 2e-16 ***
month10             0.304755   0.018833  16.182  < 2e-16 ***
month11             0.152789   0.019108   7.996 2.08e-15 ***
month12             0.024587   0.019042   1.291  0.19678    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.1878 on 2144 degrees of freedom
Multiple R-squared:  0.7209,    Adjusted R-squared:  0.7185 
F-statistic: 291.5 on 19 and 2144 DF,  p-value: < 2.2e-16
plot(lm4)


anova(lm3, lm4)
Analysis of Variance Table

Model 1: average_price ~ type + region
Model 2: average_price ~ type + region + month
  Res.Df     RSS Df Sum of Sq      F    Pr(>F)    
1   2155 102.990                                  
2   2144  75.605 11    27.385 70.598 < 2.2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# log helps diagnostics

lm4 <- lm(log(average_price) ~ type + region + month, train)

summary(lm4)

Call:
lm(formula = log(average_price) ~ type + region + month, data = train)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.57536 -0.08972 -0.00062  0.08946  0.44429 

Coefficients:
                    Estimate Std. Error t value Pr(>|t|)    
(Intercept)         0.043982   0.012402   3.546 0.000399 ***
typeorganic         0.368973   0.005879  62.760  < 2e-16 ***
regionGreatLakes   -0.027132   0.011762  -2.307 0.021166 *  
regionMidsouth      0.027394   0.011753   2.331 0.019859 *  
regionNortheast     0.152288   0.011797  12.909  < 2e-16 ***
regionPlains        0.036724   0.011729   3.131 0.001765 ** 
regionSouthCentral -0.236958   0.011657 -20.328  < 2e-16 ***
regionSoutheast     0.009883   0.011776   0.839 0.401447    
regionWest         -0.096932   0.011767  -8.237 3.03e-16 ***
month2             -0.051697   0.013277  -3.894 0.000102 ***
month3              0.003204   0.013147   0.244 0.807467    
month4              0.033313   0.013855   2.404 0.016283 *  
month5              0.012766   0.013558   0.942 0.346503    
month6              0.066478   0.014535   4.574 5.06e-06 ***
month7              0.116553   0.013616   8.560  < 2e-16 ***
month8              0.150148   0.014021  10.709  < 2e-16 ***
month9              0.197583   0.014224  13.891  < 2e-16 ***
month10             0.221953   0.013702  16.198  < 2e-16 ***
month11             0.115575   0.013903   8.313  < 2e-16 ***
month12             0.020065   0.013854   1.448 0.147681    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.1366 on 2144 degrees of freedom
Multiple R-squared:  0.7365,    Adjusted R-squared:  0.7342 
F-statistic: 315.5 on 19 and 2144 DF,  p-value: < 2.2e-16
plot(lm4)

NA

avocados_resid <- train %>% 
  add_residuals(lm4) %>% 
  dplyr::select(-average_price, -type, -region, -month) %>% 
  dplyr::select(resid, everything()) 


avocados_resid %>% 
  dplyr::select(resid, is.factor) %>% 
  ggpairs() 


# month, type and region all quite strong

avocados_resid %>% 
  dplyr::select(is.numeric) %>% 
  ggpairs() 


lm5 <- lm(log(average_price) ~ type + region + month + year, train)

summary(lm5)

Call:
lm(formula = log(average_price) ~ type + region + month + year, 
    data = train)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.62182 -0.07801  0.00214  0.08776  0.38374 

Coefficients:
                    Estimate Std. Error t value Pr(>|t|)    
(Intercept)         0.010441   0.012551   0.832 0.405604    
typeorganic         0.368094   0.005469  67.302  < 2e-16 ***
regionGreatLakes   -0.027873   0.010941  -2.548 0.010913 *  
regionMidsouth      0.026756   0.010933   2.447 0.014476 *  
regionNortheast     0.151809   0.010973  13.834  < 2e-16 ***
regionPlains        0.038806   0.010912   3.556 0.000384 ***
regionSouthCentral -0.235747   0.010844 -21.739  < 2e-16 ***
regionSoutheast     0.011921   0.010956   1.088 0.276685    
regionWest         -0.097710   0.010946  -8.927  < 2e-16 ***
month2             -0.056071   0.012362  -4.536 6.05e-06 ***
month3             -0.001403   0.012255  -0.114 0.908878    
month4              0.043917   0.013118   3.348 0.000829 ***
month5              0.033542   0.012852   2.610 0.009122 ** 
month6              0.081417   0.013738   5.926 3.60e-09 ***
month7              0.129660   0.012899  10.052  < 2e-16 ***
month8              0.168997   0.013277  12.728  < 2e-16 ***
month9              0.210681   0.013457  15.656  < 2e-16 ***
month10             0.233360   0.012978  17.982  < 2e-16 ***
month11             0.130525   0.013170   9.911  < 2e-16 ***
month12             0.031657   0.013118   2.413 0.015895 *  
year2016           -0.029911   0.006979  -4.286 1.90e-05 ***
year2017            0.086228   0.006928  12.446  < 2e-16 ***
year2018            0.090947   0.012529   7.259 5.43e-13 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.1271 on 2141 degrees of freedom
Multiple R-squared:  0.7724,    Adjusted R-squared:  0.7701 
F-statistic: 330.3 on 22 and 2141 DF,  p-value: < 2.2e-16
plot(lm5)


anova(lm4, lm5)
Analysis of Variance Table

Model 1: log(average_price) ~ type + region + month
Model 2: log(average_price) ~ type + region + month + year
  Res.Df    RSS Df Sum of Sq      F    Pr(>F)    
1   2144 40.022                                  
2   2141 34.575  3    5.4472 112.44 < 2.2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
avocados_resid <- train %>% 
  add_residuals(lm5) %>% 
  dplyr::select(-average_price, -type, -region, -month, -year) %>% 
  dplyr::select(resid, everything()) 


avocados_resid %>% 
  dplyr::select(resid, is.factor) %>% 
  ggpairs() 


# month, type and region all quite strong

avocados_resid %>% 
  dplyr::select(is.numeric) %>% 
  ggpairs() 

lm6 <- lm(log(average_price) ~ type + region + month + year + x4046, train)

summary(lm6)

Call:
lm(formula = log(average_price) ~ type + region + month + year + 
    x4046, data = train)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.59343 -0.07397  0.00511  0.08087  0.37116 

Coefficients:
                     Estimate Std. Error t value Pr(>|t|)    
(Intercept)         1.417e-01  1.489e-02   9.520  < 2e-16 ***
typeorganic         2.667e-01  8.608e-03  30.982  < 2e-16 ***
regionGreatLakes   -8.766e-02  1.118e-02  -7.842 6.95e-15 ***
regionMidsouth     -3.016e-02  1.110e-02  -2.716 0.006656 ** 
regionNortheast     7.905e-02  1.155e-02   6.843 1.01e-11 ***
regionPlains       -1.125e-02  1.093e-02  -1.029 0.303418    
regionSouthCentral -2.070e-01  1.051e-02 -19.695  < 2e-16 ***
regionSoutheast     2.699e-03  1.046e-02   0.258 0.796351    
regionWest         -9.815e-02  1.043e-02  -9.413  < 2e-16 ***
month2             -4.407e-02  1.180e-02  -3.733 0.000194 ***
month3             -1.487e-05  1.168e-02  -0.001 0.998984    
month4              4.874e-02  1.250e-02   3.899 9.97e-05 ***
month5              4.678e-02  1.228e-02   3.810 0.000143 ***
month6              8.599e-02  1.309e-02   6.568 6.38e-11 ***
month7              1.291e-01  1.229e-02  10.508  < 2e-16 ***
month8              1.694e-01  1.265e-02  13.389  < 2e-16 ***
month9              2.014e-01  1.283e-02  15.689  < 2e-16 ***
month10             2.181e-01  1.241e-02  17.578  < 2e-16 ***
month11             1.191e-01  1.257e-02   9.477  < 2e-16 ***
month12             2.312e-02  1.251e-02   1.848 0.064770 .  
year2016           -3.433e-02  6.656e-03  -5.159 2.72e-07 ***
year2017            8.665e-02  6.600e-03  13.129  < 2e-16 ***
year2018            9.686e-02  1.194e-02   8.111 8.38e-16 ***
x4046              -6.759e-08  4.567e-09 -14.800  < 2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.1211 on 2140 degrees of freedom
Multiple R-squared:  0.7935,    Adjusted R-squared:  0.7913 
F-statistic: 357.6 on 23 and 2140 DF,  p-value: < 2.2e-16
plot(lm6)

NA

avocados_resid <- train %>% 
  add_residuals(lm6) %>% 
  dplyr::select(-average_price, -type, -region, -month, -year, -x4046) %>% 
  dplyr::select(resid, everything()) 


avocados_resid %>% 
  dplyr::select(is.numeric) %>% 
  ggpairs() 




lm7 <- lm(log(average_price) ~ type + region + month + year + x4046 + total_volume, train)

summary(lm7)

Call:
lm(formula = log(average_price) ~ type + region + month + year + 
    x4046 + total_volume, data = train)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.59290 -0.06734  0.00637  0.07724  0.35848 

Coefficients:
                     Estimate Std. Error t value Pr(>|t|)    
(Intercept)         2.658e-01  1.803e-02  14.741  < 2e-16 ***
typeorganic         1.591e-01  1.254e-02  12.682  < 2e-16 ***
regionGreatLakes   -1.001e-01  1.090e-02  -9.183  < 2e-16 ***
regionMidsouth     -5.575e-02  1.100e-02  -5.067 4.39e-07 ***
regionNortheast     9.375e-02  1.129e-02   8.307  < 2e-16 ***
regionPlains       -6.756e-02  1.168e-02  -5.782 8.48e-09 ***
regionSouthCentral -2.321e-01  1.043e-02 -22.245  < 2e-16 ***
regionSoutheast    -4.491e-02  1.096e-02  -4.098 4.33e-05 ***
regionWest         -9.192e-02  1.014e-02  -9.069  < 2e-16 ***
month2             -3.620e-02  1.148e-02  -3.154 0.001635 ** 
month3             -1.842e-03  1.133e-02  -0.163 0.870920    
month4              4.809e-02  1.213e-02   3.963 7.65e-05 ***
month5              5.242e-02  1.193e-02   4.396 1.16e-05 ***
month6              8.722e-02  1.271e-02   6.863 8.78e-12 ***
month7              1.292e-01  1.193e-02  10.829  < 2e-16 ***
month8              1.620e-01  1.229e-02  13.174  < 2e-16 ***
month9              1.942e-01  1.247e-02  15.567  < 2e-16 ***
month10             2.062e-01  1.209e-02  17.061  < 2e-16 ***
month11             1.079e-01  1.224e-02   8.817  < 2e-16 ***
month12             1.886e-02  1.215e-02   1.552 0.120730    
year2016           -2.258e-02  6.540e-03  -3.453 0.000565 ***
year2017            9.830e-02  6.486e-03  15.156  < 2e-16 ***
year2018            1.182e-01  1.174e-02  10.071  < 2e-16 ***
x4046              -1.456e-08  6.394e-09  -2.277 0.022905 *  
total_volume       -4.551e-08  3.955e-09 -11.507  < 2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.1175 on 2139 degrees of freedom
Multiple R-squared:  0.8056,    Adjusted R-squared:  0.8034 
F-statistic: 369.3 on 24 and 2139 DF,  p-value: < 2.2e-16
plot(lm7)

avocados_resid <- train %>% 
  add_residuals(lm7) %>% 
  dplyr::select(-average_price, -type, -region, -month, -year, -x4046, -total_volume) %>% 
  dplyr::select(resid, everything()) 


avocados_resid %>% 
  dplyr::select(is.numeric) %>% 
  ggpairs() 

lm8 <- lm(log(average_price) ~ type + region + month + year + x4046 + total_volume + large_bags, train)
lm8b <- lm(log(average_price)~1+type+year+region+month+total_volume+total_bags+small_bags+large_bags, train)

summary(lm8)

Call:
lm(formula = log(average_price) ~ type + region + month + year + 
    x4046 + total_volume + large_bags, data = train)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.53786 -0.06832  0.00604  0.07984  0.38228 

Coefficients:
                     Estimate Std. Error t value Pr(>|t|)    
(Intercept)         2.486e-01  1.795e-02  13.852  < 2e-16 ***
typeorganic         1.811e-01  1.273e-02  14.223  < 2e-16 ***
regionGreatLakes   -5.434e-02  1.239e-02  -4.384 1.22e-05 ***
regionMidsouth     -3.020e-02  1.139e-02  -2.651 0.008097 ** 
regionNortheast     1.069e-01  1.128e-02   9.475  < 2e-16 ***
regionPlains       -4.086e-02  1.208e-02  -3.382 0.000732 ***
regionSouthCentral -2.233e-01  1.037e-02 -21.537  < 2e-16 ***
regionSoutheast    -9.029e-03  1.184e-02  -0.762 0.445942    
regionWest         -3.452e-02  1.262e-02  -2.735 0.006298 ** 
month2             -3.695e-02  1.133e-02  -3.260 0.001133 ** 
month3             -2.796e-03  1.119e-02  -0.250 0.802777    
month4              4.876e-02  1.198e-02   4.069 4.88e-05 ***
month5              5.270e-02  1.178e-02   4.475 8.04e-06 ***
month6              8.732e-02  1.255e-02   6.959 4.55e-12 ***
month7              1.282e-01  1.178e-02  10.884  < 2e-16 ***
month8              1.566e-01  1.216e-02  12.874  < 2e-16 ***
month9              1.927e-01  1.232e-02  15.643  < 2e-16 ***
month10             2.065e-01  1.193e-02  17.300  < 2e-16 ***
month11             1.094e-01  1.209e-02   9.054  < 2e-16 ***
month12             2.001e-02  1.200e-02   1.668 0.095498 .  
year2016           -1.371e-02  6.567e-03  -2.088 0.036908 *  
year2017            1.033e-01  6.439e-03  16.040  < 2e-16 ***
year2018            1.171e-01  1.159e-02  10.097  < 2e-16 ***
x4046              -9.166e-09  6.355e-09  -1.442 0.149394    
total_volume       -4.477e-08  3.907e-09 -11.459  < 2e-16 ***
large_bags         -1.268e-01  1.700e-02  -7.459 1.26e-13 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.116 on 2138 degrees of freedom
Multiple R-squared:  0.8105,    Adjusted R-squared:  0.8083 
F-statistic: 365.8 on 25 and 2138 DF,  p-value: < 2.2e-16
plot(lm8)

summary(lm8b)

Call:
lm(formula = log(average_price) ~ 1 + type + year + region + 
    month + total_volume + total_bags + small_bags + large_bags, 
    data = train)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.53155 -0.06898  0.00640  0.07745  0.38245 

Coefficients:
                     Estimate Std. Error t value Pr(>|t|)    
(Intercept)         1.087e+00  1.895e-01   5.737 1.10e-08 ***
typeorganic         1.895e-01  1.297e-02  14.612  < 2e-16 ***
year2016           -2.484e-02  6.945e-03  -3.577 0.000356 ***
year2017            8.886e-02  7.120e-03  12.480  < 2e-16 ***
year2018            9.844e-02  1.225e-02   8.039 1.49e-15 ***
regionGreatLakes   -6.579e-02  1.233e-02  -5.334 1.06e-07 ***
regionMidsouth     -2.953e-02  1.142e-02  -2.585 0.009798 ** 
regionNortheast     1.159e-01  1.049e-02  11.044  < 2e-16 ***
regionPlains       -4.236e-02  1.205e-02  -3.516 0.000447 ***
regionSouthCentral -2.226e-01  9.986e-03 -22.290  < 2e-16 ***
regionSoutheast    -1.606e-02  1.175e-02  -1.367 0.171841    
regionWest         -4.054e-02  1.296e-02  -3.129 0.001778 ** 
month2             -3.789e-02  1.126e-02  -3.366 0.000777 ***
month3             -6.334e-03  1.113e-02  -0.569 0.569436    
month4              4.466e-02  1.192e-02   3.747 0.000184 ***
month5              4.934e-02  1.171e-02   4.214 2.62e-05 ***
month6              7.738e-02  1.259e-02   6.145 9.50e-10 ***
month7              1.165e-01  1.191e-02   9.783  < 2e-16 ***
month8              1.506e-01  1.211e-02  12.434  < 2e-16 ***
month9              1.895e-01  1.225e-02  15.475  < 2e-16 ***
month10             2.035e-01  1.187e-02  17.136  < 2e-16 ***
month11             1.053e-01  1.204e-02   8.747  < 2e-16 ***
month12             1.358e-02  1.198e-02   1.134 0.257117    
total_volume       -5.832e-08  3.857e-09 -15.120  < 2e-16 ***
total_bags          3.612e-08  9.623e-09   3.754 0.000179 ***
small_bags         -8.341e-01  1.915e-01  -4.356 1.39e-05 ***
large_bags         -9.535e-01  1.908e-01  -4.996 6.32e-07 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.1153 on 2137 degrees of freedom
Multiple R-squared:  0.8131,    Adjusted R-squared:  0.8109 
F-statistic: 357.7 on 26 and 2137 DF,  p-value: < 2.2e-16
plot(lm8b)

avocados_resid <- train %>% 
  add_residuals(lm8) %>% 
  dplyr::select(-average_price, -type, -region, -month, -year, -x4046, -total_volume, -large_bags) %>% 
  dplyr::select(resid, everything()) 


avocados_resid %>% 
  dplyr::select(is.numeric) %>% 
  ggpairs() 

lm9 <- lm(log(average_price) ~ type + region + month + year + x4046 + total_volume + large_bags, train)

summary(lm9)

Call:
lm(formula = log(average_price) ~ type + region + month + year + 
    x4046 + total_volume + large_bags, data = train)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.53786 -0.06832  0.00604  0.07984  0.38228 

Coefficients:
                     Estimate Std. Error t value Pr(>|t|)    
(Intercept)         2.486e-01  1.795e-02  13.852  < 2e-16 ***
typeorganic         1.811e-01  1.273e-02  14.223  < 2e-16 ***
regionGreatLakes   -5.434e-02  1.239e-02  -4.384 1.22e-05 ***
regionMidsouth     -3.020e-02  1.139e-02  -2.651 0.008097 ** 
regionNortheast     1.069e-01  1.128e-02   9.475  < 2e-16 ***
regionPlains       -4.086e-02  1.208e-02  -3.382 0.000732 ***
regionSouthCentral -2.233e-01  1.037e-02 -21.537  < 2e-16 ***
regionSoutheast    -9.029e-03  1.184e-02  -0.762 0.445942    
regionWest         -3.452e-02  1.262e-02  -2.735 0.006298 ** 
month2             -3.695e-02  1.133e-02  -3.260 0.001133 ** 
month3             -2.796e-03  1.119e-02  -0.250 0.802777    
month4              4.876e-02  1.198e-02   4.069 4.88e-05 ***
month5              5.270e-02  1.178e-02   4.475 8.04e-06 ***
month6              8.732e-02  1.255e-02   6.959 4.55e-12 ***
month7              1.282e-01  1.178e-02  10.884  < 2e-16 ***
month8              1.566e-01  1.216e-02  12.874  < 2e-16 ***
month9              1.927e-01  1.232e-02  15.643  < 2e-16 ***
month10             2.065e-01  1.193e-02  17.300  < 2e-16 ***
month11             1.094e-01  1.209e-02   9.054  < 2e-16 ***
month12             2.001e-02  1.200e-02   1.668 0.095498 .  
year2016           -1.371e-02  6.567e-03  -2.088 0.036908 *  
year2017            1.033e-01  6.439e-03  16.040  < 2e-16 ***
year2018            1.171e-01  1.159e-02  10.097  < 2e-16 ***
x4046              -9.166e-09  6.355e-09  -1.442 0.149394    
total_volume       -4.477e-08  3.907e-09 -11.459  < 2e-16 ***
large_bags         -1.268e-01  1.700e-02  -7.459 1.26e-13 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.116 on 2138 degrees of freedom
Multiple R-squared:  0.8105,    Adjusted R-squared:  0.8083 
F-statistic: 365.8 on 25 and 2138 DF,  p-value: < 2.2e-16
plot(lm9)

lm10 <- lm(log(average_price) ~ type + region + month + year + x4046 + total_volume + large_bags + type:total_volume, train)

summary(lm10)

Call:
lm(formula = log(average_price) ~ type + region + month + year + 
    x4046 + total_volume + large_bags + type:total_volume, data = train)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.36543 -0.06091  0.00252  0.06397  0.68320 

Coefficients:
                           Estimate Std. Error t value Pr(>|t|)    
(Intercept)               3.783e-01  1.609e-02  23.509  < 2e-16 ***
typeorganic               2.288e-01  1.106e-02  20.689  < 2e-16 ***
regionGreatLakes         -1.098e-01  1.082e-02 -10.146  < 2e-16 ***
regionMidsouth           -1.102e-01  1.019e-02 -10.812  < 2e-16 ***
regionNortheast           1.014e-01  9.681e-03  10.479  < 2e-16 ***
regionPlains             -1.938e-01  1.174e-02 -16.504  < 2e-16 ***
regionSouthCentral       -2.785e-01  9.115e-03 -30.557  < 2e-16 ***
regionSoutheast          -1.407e-01  1.122e-02 -12.547  < 2e-16 ***
regionWest               -3.122e-03  1.089e-02  -0.287 0.774343    
month2                   -1.405e-02  9.758e-03  -1.440 0.150073    
month3                    2.592e-02  9.657e-03   2.684 0.007335 ** 
month4                    7.629e-02  1.033e-02   7.388 2.13e-13 ***
month5                    8.944e-02  1.019e-02   8.778  < 2e-16 ***
month6                    1.113e-01  1.080e-02  10.302  < 2e-16 ***
month7                    1.439e-01  1.012e-02  14.220  < 2e-16 ***
month8                    1.640e-01  1.044e-02  15.716  < 2e-16 ***
month9                    1.920e-01  1.057e-02  18.172  < 2e-16 ***
month10                   1.955e-01  1.025e-02  19.078  < 2e-16 ***
month11                   9.824e-02  1.038e-02   9.466  < 2e-16 ***
month12                   2.400e-02  1.029e-02   2.331 0.019820 *  
year2016                  2.060e-02  5.768e-03   3.571 0.000363 ***
year2017                  1.636e-01  5.937e-03  27.559  < 2e-16 ***
year2018                  2.257e-01  1.069e-02  21.113  < 2e-16 ***
x4046                     2.991e-08  5.631e-09   5.311 1.20e-07 ***
total_volume             -9.027e-08  3.732e-09 -24.189  < 2e-16 ***
large_bags               -6.960e-02  1.473e-02  -4.726 2.44e-06 ***
typeorganic:total_volume -1.514e-06  5.461e-08 -27.720  < 2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.09954 on 2137 degrees of freedom
Multiple R-squared:  0.8606,    Adjusted R-squared:  0.8589 
F-statistic: 507.5 on 26 and 2137 DF,  p-value: < 2.2e-16
plot(lm10)

lm11 <- lm(log(average_price) ~ type + region + month + year + x4046 + total_volume + large_bags + type:total_volume + type:region, train)

lm12 <- lm(log(average_price) ~ type + region + month + year + x4046 + total_volume + large_bags + type:total_volume + type:region + type:year + region:total_volume, train)

summary(lm11)

Call:
lm(formula = log(average_price) ~ type + region + month + year + 
    x4046 + total_volume + large_bags + type:total_volume + type:region, 
    data = train)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.31319 -0.05543  0.00383  0.05520  0.73756 

Coefficients:
                                 Estimate Std. Error t value Pr(>|t|)    
(Intercept)                     5.338e-01  2.367e-02  22.546  < 2e-16 ***
typeorganic                     7.485e-02  2.454e-02   3.050  0.00232 ** 
regionGreatLakes               -1.521e-01  1.697e-02  -8.962  < 2e-16 ***
regionMidsouth                 -1.975e-01  1.675e-02 -11.789  < 2e-16 ***
regionNortheast                 3.109e-02  1.913e-02   1.625  0.10423    
regionPlains                   -3.585e-01  1.848e-02 -19.405  < 2e-16 ***
regionSouthCentral             -2.141e-01  1.352e-02 -15.836  < 2e-16 ***
regionSoutheast                -1.235e-01  1.551e-02  -7.962 2.73e-15 ***
regionWest                     -2.386e-02  1.191e-02  -2.004  0.04522 *  
month2                         -8.440e-03  8.785e-03  -0.961  0.33682    
month3                          2.716e-02  8.595e-03   3.161  0.00160 ** 
month4                          7.934e-02  9.420e-03   8.423  < 2e-16 ***
month5                          9.274e-02  9.491e-03   9.771  < 2e-16 ***
month6                          1.093e-01  9.581e-03  11.408  < 2e-16 ***
month7                          1.409e-01  9.426e-03  14.950  < 2e-16 ***
month8                          1.524e-01  9.400e-03  16.211  < 2e-16 ***
month9                          1.735e-01  9.690e-03  17.899  < 2e-16 ***
month10                         1.677e-01  9.480e-03  17.684  < 2e-16 ***
month11                         9.017e-02  9.611e-03   9.382  < 2e-16 ***
month12                         8.005e-03  9.404e-03   0.851  0.39475    
year2016                        3.749e-02  5.388e-03   6.959 4.56e-12 ***
year2017                        1.765e-01  5.507e-03  32.056  < 2e-16 ***
year2018                        2.355e-01  9.641e-03  24.426  < 2e-16 ***
x4046                           3.031e-09  9.240e-09   0.328  0.74296    
total_volume                   -1.029e-07  4.703e-09 -21.882  < 2e-16 ***
large_bags                     -2.036e-01  1.540e-02 -13.223  < 2e-16 ***
typeorganic:total_volume       -1.574e-06  5.197e-08 -30.280  < 2e-16 ***
typeorganic:regionGreatLakes    9.036e-02  2.020e-02   4.474 8.09e-06 ***
typeorganic:regionMidsouth      1.277e-01  2.008e-02   6.357 2.51e-10 ***
typeorganic:regionNortheast     7.221e-02  2.194e-02   3.291  0.00101 ** 
typeorganic:regionPlains        2.662e-01  2.190e-02  12.155  < 2e-16 ***
typeorganic:regionSouthCentral -8.674e-02  1.753e-02  -4.947 8.12e-07 ***
typeorganic:regionSoutheast    -1.484e-02  1.879e-02  -0.790  0.42979    
typeorganic:regionWest          1.707e-01  1.667e-02  10.242  < 2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.09041 on 2130 degrees of freedom
Multiple R-squared:  0.8822,    Adjusted R-squared:  0.8804 
F-statistic: 483.4 on 33 and 2130 DF,  p-value: < 2.2e-16
plot(lm11)

glance(lm11)
glance(lm12)
train_rmse <- train %>% 
  add_residuals(lm12) %>% 
  mutate(sq_resid = resid^2) %>% 
  summarise(mse = mean(sq_resid),
            rmse = mse^0.5) %>% 
  pull(rmse)


train_rmse
[1] 0.08437852
predictions_test <- test %>% 
  add_predictions(lm12) %>% 
  add_residuals(lm12) %>% 
  dplyr::select(average_price, pred, resid) 
  
test_rsme <- predictions_test %>% 
  mutate(sq_resid = resid^2) %>% 
  summarise(mse = mean(sq_resid),
            rmse = mse^0.5) %>% 
  pull(rmse)

test_rsme / train_rmse
[1] 0.9530149
cv_10_fold <- trainControl(
  method = "cv",
  number = 10,
  savePredictions = TRUE
)

model2 <- train(log(average_price) ~ type + region + month + year + x4046 + total_volume + large_bags + type:total_volume, 
               avocados_prop, 
               trControl = cv_10_fold, 
               method = "lm")

 model2$resample %>% 
  summarise(av_r2 = mean(Rsquared),
            av_rmse = mean(RMSE))
cv_10_fold <- trainControl(
  method = "cv",
  number = 10,
  savePredictions = TRUE
)

model2 <- train(log(average_price) ~ type + region + month + year + x4046+ total_volume + large_bags + type:total_volume + type:region + type:year + region:total_volume, 
               avocados_prop, 
               trControl = cv_10_fold, 
               method = "lm")

 model2$resample %>% 
  summarise(av_r2 = mean(Rsquared),
            av_rmse = mean(RMSE))
calc.relimp(lm9, type = "lmg", rela = TRUE)
Response variable: log(average_price) 
Total response variance: 0.07023302 
Analysis based on 2164 observations 

25 Regressors: 
Some regressors combined in groups: 
        Group  region : regionGreatLakes regionMidsouth regionNortheast regionPlains regionSouthCentral regionSoutheast regionWest 
        Group  month : month2 month3 month4 month5 month6 month7 month8 month9 month10 month11 month12 
        Group  year : year2016 year2017 year2018 

 Relative importance of 7 (groups of) regressors assessed: 
 region month year type x4046 total_volume large_bags 
 
Proportion of variance explained by model: 81.05%
Metrics are normalized to sum to 100% (rela=TRUE). 

Relative importance metrics: 

                     lmg
region       0.134641265
month        0.095970421
year         0.048764397
type         0.226128036
x4046        0.226597172
total_volume 0.261021543
large_bags   0.006877166

Average coefficients for different model sizes: 

                          1group       2groups       3groups       4groups       5groups
type                3.678353e-01  2.947586e-01  2.436647e-01  2.119444e-01  1.952180e-01
regionGreatLakes   -2.975501e-02 -8.130292e-02 -1.008357e-01 -9.960861e-02 -8.664705e-02
regionMidsouth      2.099359e-02 -2.820968e-02 -5.283919e-02 -5.923284e-02 -5.349555e-02
regionNortheast     1.442123e-01  9.997769e-02  7.877882e-02  7.564505e-02  8.432104e-02
regionPlains        3.675087e-02 -1.894309e-02 -5.004991e-02 -6.243166e-02 -6.147231e-02
regionSouthCentral -2.418414e-01 -2.290649e-01 -2.213823e-01 -2.190153e-01 -2.205260e-01
regionSoutheast    -2.309713e-03 -2.357879e-02 -3.084214e-02 -3.041651e-02 -2.584134e-02
regionWest         -9.498219e-02 -1.033868e-01 -9.278081e-02 -7.559419e-02 -5.887311e-02
month2             -5.322182e-02 -4.215024e-02 -3.489426e-02 -3.177629e-02 -3.219225e-02
month3              2.336431e-02  1.566266e-02  1.008000e-02  5.845023e-03  2.510236e-03
month4              4.610019e-02  4.461026e-02  4.471396e-02  4.574120e-02  4.695995e-02
month5              9.289308e-03  2.413591e-02  3.603635e-02  4.431730e-02  4.895355e-02
month6              8.095504e-02  7.946350e-02  8.010731e-02  8.194235e-02  8.398654e-02
month7              1.096422e-01  1.128397e-01  1.159579e-01  1.192298e-01  1.224712e-01
month8              1.477032e-01  1.483213e-01  1.495314e-01  1.511501e-01  1.530281e-01
month9              1.953311e-01  1.895520e-01  1.861248e-01  1.854024e-01  1.870272e-01
month10             2.185677e-01  2.077589e-01  2.008506e-01  1.984080e-01  1.997708e-01
month11             1.220613e-01  1.095958e-01  1.025676e-01  1.005300e-01  1.022746e-01
month12             2.273399e-02  1.494875e-02  1.076900e-02  1.031215e-02  1.268258e-02
year2016           -3.709716e-02 -3.284356e-02 -2.870411e-02 -2.440112e-02 -2.031367e-02
year2017            8.598713e-02  9.019967e-02  9.426022e-02  9.788880e-02  1.005768e-01
year2018           -8.395387e-03  2.183752e-02  4.779110e-02  6.934389e-02  8.731159e-02
x4046              -1.796573e-07 -1.525806e-07 -1.217385e-07 -8.902625e-08 -5.716638e-08
total_volume       -8.333252e-08 -7.587114e-08 -6.872221e-08 -6.185503e-08 -5.535688e-08
large_bags          1.270429e-03 -3.761072e-02 -7.239215e-02 -9.810363e-02 -1.141962e-01
                         6groups       7groups
type                1.873184e-01  1.810948e-01
regionGreatLakes   -6.963509e-02 -5.433530e-02
regionMidsouth     -4.189492e-02 -3.019837e-02
regionNortheast     9.728842e-02  1.069021e-01
regionPlains       -5.260654e-02 -4.085744e-02
regionSouthCentral -2.231349e-01 -2.233071e-01
regionSoutheast    -1.873145e-02 -9.028850e-03
regionWest         -4.549297e-02 -3.452304e-02
month2             -3.468319e-02 -3.694705e-02
month3             -2.772264e-04 -2.795779e-03
month4              4.793720e-02  4.876221e-02
month5              5.097630e-02  5.270201e-02
month6              8.569976e-02  8.732103e-02
month7              1.254806e-01  1.282013e-01
month8              1.549898e-01  1.565626e-01
month9              1.900012e-01  1.927043e-01
month10             2.032871e-01  2.064749e-01
month11             1.059677e-01  1.094442e-01
month12             1.643131e-02  2.000957e-02
year2016           -1.673547e-02 -1.371257e-02
year2017            1.022139e-01  1.032881e-01
year2018            1.029213e-01  1.170627e-01
x4046              -2.950483e-08 -9.165685e-09
total_volume       -4.949132e-08 -4.476689e-08
large_bags         -1.225210e-01 -1.268101e-01

AUTOMATED


#### GARBAGE

regsubset_forwards <- regsubsets(average_price ~ ., 
                          data = avocados_prop,
                          nvmax = 40,
                          method = "forward")


summary(regsubset_forwards)
Subset selection object
Call: regsubsets.formula(average_price ~ ., data = avocados_prop, nvmax = 40, 
    method = "forward")
29 Variables  (and intercept)
                   Forced in Forced out
total_volume           FALSE      FALSE
x4046                  FALSE      FALSE
x4225                  FALSE      FALSE
x4770                  FALSE      FALSE
total_bags             FALSE      FALSE
small_bags             FALSE      FALSE
large_bags             FALSE      FALSE
typeorganic            FALSE      FALSE
year2016               FALSE      FALSE
year2017               FALSE      FALSE
year2018               FALSE      FALSE
regionGreatLakes       FALSE      FALSE
regionMidsouth         FALSE      FALSE
regionNortheast        FALSE      FALSE
regionPlains           FALSE      FALSE
regionSouthCentral     FALSE      FALSE
regionSoutheast        FALSE      FALSE
regionWest             FALSE      FALSE
month2                 FALSE      FALSE
month3                 FALSE      FALSE
month4                 FALSE      FALSE
month5                 FALSE      FALSE
month6                 FALSE      FALSE
month7                 FALSE      FALSE
month8                 FALSE      FALSE
month9                 FALSE      FALSE
month10                FALSE      FALSE
month11                FALSE      FALSE
month12                FALSE      FALSE
1 subsets of each size up to 29
Selection Algorithm: forward
          total_volume x4046 x4225 x4770 total_bags small_bags large_bags typeorganic year2016
1  ( 1 )  "*"          " "   " "   " "   " "        " "        " "        " "         " "     
2  ( 1 )  "*"          " "   " "   " "   " "        " "        " "        " "         " "     
3  ( 1 )  "*"          " "   " "   " "   " "        " "        " "        " "         " "     
4  ( 1 )  "*"          " "   " "   " "   " "        " "        " "        " "         " "     
5  ( 1 )  "*"          " "   " "   " "   " "        " "        " "        "*"         " "     
6  ( 1 )  "*"          " "   " "   " "   " "        " "        "*"        "*"         " "     
7  ( 1 )  "*"          " "   " "   " "   " "        " "        "*"        "*"         " "     
8  ( 1 )  "*"          " "   " "   " "   " "        " "        "*"        "*"         " "     
9  ( 1 )  "*"          " "   " "   " "   " "        " "        "*"        "*"         " "     
10  ( 1 ) "*"          " "   " "   " "   " "        " "        "*"        "*"         " "     
11  ( 1 ) "*"          " "   " "   " "   " "        " "        "*"        "*"         " "     
12  ( 1 ) "*"          " "   " "   " "   " "        " "        "*"        "*"         " "     
13  ( 1 ) "*"          " "   " "   " "   " "        " "        "*"        "*"         " "     
14  ( 1 ) "*"          " "   " "   " "   " "        " "        "*"        "*"         " "     
15  ( 1 ) "*"          " "   "*"   " "   " "        " "        "*"        "*"         " "     
16  ( 1 ) "*"          " "   "*"   " "   " "        " "        "*"        "*"         " "     
17  ( 1 ) "*"          " "   "*"   " "   " "        " "        "*"        "*"         " "     
18  ( 1 ) "*"          " "   "*"   " "   " "        "*"        "*"        "*"         " "     
19  ( 1 ) "*"          " "   "*"   " "   " "        "*"        "*"        "*"         " "     
20  ( 1 ) "*"          " "   "*"   " "   " "        "*"        "*"        "*"         "*"     
21  ( 1 ) "*"          " "   "*"   " "   " "        "*"        "*"        "*"         "*"     
22  ( 1 ) "*"          " "   "*"   " "   " "        "*"        "*"        "*"         "*"     
23  ( 1 ) "*"          " "   "*"   " "   " "        "*"        "*"        "*"         "*"     
24  ( 1 ) "*"          " "   "*"   " "   " "        "*"        "*"        "*"         "*"     
25  ( 1 ) "*"          "*"   "*"   " "   " "        "*"        "*"        "*"         "*"     
26  ( 1 ) "*"          "*"   "*"   "*"   " "        "*"        "*"        "*"         "*"     
27  ( 1 ) "*"          "*"   "*"   "*"   "*"        "*"        "*"        "*"         "*"     
28  ( 1 ) "*"          "*"   "*"   "*"   "*"        "*"        "*"        "*"         "*"     
29  ( 1 ) "*"          "*"   "*"   "*"   "*"        "*"        "*"        "*"         "*"     
          year2017 year2018 regionGreatLakes regionMidsouth regionNortheast regionPlains
1  ( 1 )  " "      " "      " "              " "            " "             " "         
2  ( 1 )  " "      " "      " "              " "            "*"             " "         
3  ( 1 )  "*"      " "      " "              " "            "*"             " "         
4  ( 1 )  "*"      " "      " "              " "            "*"             " "         
5  ( 1 )  "*"      " "      " "              " "            "*"             " "         
6  ( 1 )  "*"      " "      " "              " "            "*"             " "         
7  ( 1 )  "*"      " "      " "              " "            "*"             " "         
8  ( 1 )  "*"      " "      " "              " "            "*"             " "         
9  ( 1 )  "*"      " "      " "              " "            "*"             " "         
10  ( 1 ) "*"      " "      " "              " "            "*"             " "         
11  ( 1 ) "*"      " "      " "              " "            "*"             " "         
12  ( 1 ) "*"      "*"      " "              " "            "*"             " "         
13  ( 1 ) "*"      "*"      " "              " "            "*"             " "         
14  ( 1 ) "*"      "*"      " "              " "            "*"             " "         
15  ( 1 ) "*"      "*"      " "              " "            "*"             " "         
16  ( 1 ) "*"      "*"      " "              " "            "*"             " "         
17  ( 1 ) "*"      "*"      " "              " "            "*"             " "         
18  ( 1 ) "*"      "*"      " "              " "            "*"             " "         
19  ( 1 ) "*"      "*"      "*"              " "            "*"             " "         
20  ( 1 ) "*"      "*"      "*"              " "            "*"             " "         
21  ( 1 ) "*"      "*"      "*"              " "            "*"             " "         
22  ( 1 ) "*"      "*"      "*"              " "            "*"             " "         
23  ( 1 ) "*"      "*"      "*"              " "            "*"             "*"         
24  ( 1 ) "*"      "*"      "*"              "*"            "*"             "*"         
25  ( 1 ) "*"      "*"      "*"              "*"            "*"             "*"         
26  ( 1 ) "*"      "*"      "*"              "*"            "*"             "*"         
27  ( 1 ) "*"      "*"      "*"              "*"            "*"             "*"         
28  ( 1 ) "*"      "*"      "*"              "*"            "*"             "*"         
29  ( 1 ) "*"      "*"      "*"              "*"            "*"             "*"         
          regionSouthCentral regionSoutheast regionWest month2 month3 month4 month5 month6 month7
1  ( 1 )  " "                " "             " "        " "    " "    " "    " "    " "    " "   
2  ( 1 )  " "                " "             " "        " "    " "    " "    " "    " "    " "   
3  ( 1 )  " "                " "             " "        " "    " "    " "    " "    " "    " "   
4  ( 1 )  "*"                " "             " "        " "    " "    " "    " "    " "    " "   
5  ( 1 )  "*"                " "             " "        " "    " "    " "    " "    " "    " "   
6  ( 1 )  "*"                " "             " "        " "    " "    " "    " "    " "    " "   
7  ( 1 )  "*"                " "             " "        " "    " "    " "    " "    " "    " "   
8  ( 1 )  "*"                " "             " "        " "    " "    " "    " "    " "    " "   
9  ( 1 )  "*"                " "             " "        " "    " "    " "    " "    " "    " "   
10  ( 1 ) "*"                " "             " "        " "    " "    " "    " "    " "    "*"   
11  ( 1 ) "*"                " "             " "        " "    " "    " "    " "    " "    "*"   
12  ( 1 ) "*"                " "             " "        " "    " "    " "    " "    " "    "*"   
13  ( 1 ) "*"                " "             " "        " "    " "    " "    " "    "*"    "*"   
14  ( 1 ) "*"                " "             " "        "*"    " "    " "    " "    "*"    "*"   
15  ( 1 ) "*"                " "             " "        "*"    " "    " "    " "    "*"    "*"   
16  ( 1 ) "*"                " "             " "        "*"    " "    " "    "*"    "*"    "*"   
17  ( 1 ) "*"                " "             " "        "*"    " "    "*"    "*"    "*"    "*"   
18  ( 1 ) "*"                " "             " "        "*"    " "    "*"    "*"    "*"    "*"   
19  ( 1 ) "*"                " "             " "        "*"    " "    "*"    "*"    "*"    "*"   
20  ( 1 ) "*"                " "             " "        "*"    " "    "*"    "*"    "*"    "*"   
21  ( 1 ) "*"                " "             " "        "*"    " "    "*"    "*"    "*"    "*"   
22  ( 1 ) "*"                " "             "*"        "*"    " "    "*"    "*"    "*"    "*"   
23  ( 1 ) "*"                " "             "*"        "*"    " "    "*"    "*"    "*"    "*"   
24  ( 1 ) "*"                " "             "*"        "*"    " "    "*"    "*"    "*"    "*"   
25  ( 1 ) "*"                " "             "*"        "*"    " "    "*"    "*"    "*"    "*"   
26  ( 1 ) "*"                " "             "*"        "*"    " "    "*"    "*"    "*"    "*"   
27  ( 1 ) "*"                " "             "*"        "*"    " "    "*"    "*"    "*"    "*"   
28  ( 1 ) "*"                "*"             "*"        "*"    " "    "*"    "*"    "*"    "*"   
29  ( 1 ) "*"                "*"             "*"        "*"    "*"    "*"    "*"    "*"    "*"   
          month8 month9 month10 month11 month12
1  ( 1 )  " "    " "    " "     " "     " "    
2  ( 1 )  " "    " "    " "     " "     " "    
3  ( 1 )  " "    " "    " "     " "     " "    
4  ( 1 )  " "    " "    " "     " "     " "    
5  ( 1 )  " "    " "    " "     " "     " "    
6  ( 1 )  " "    " "    " "     " "     " "    
7  ( 1 )  " "    " "    "*"     " "     " "    
8  ( 1 )  " "    "*"    "*"     " "     " "    
9  ( 1 )  "*"    "*"    "*"     " "     " "    
10  ( 1 ) "*"    "*"    "*"     " "     " "    
11  ( 1 ) "*"    "*"    "*"     "*"     " "    
12  ( 1 ) "*"    "*"    "*"     "*"     " "    
13  ( 1 ) "*"    "*"    "*"     "*"     " "    
14  ( 1 ) "*"    "*"    "*"     "*"     " "    
15  ( 1 ) "*"    "*"    "*"     "*"     " "    
16  ( 1 ) "*"    "*"    "*"     "*"     " "    
17  ( 1 ) "*"    "*"    "*"     "*"     " "    
18  ( 1 ) "*"    "*"    "*"     "*"     " "    
19  ( 1 ) "*"    "*"    "*"     "*"     " "    
20  ( 1 ) "*"    "*"    "*"     "*"     " "    
21  ( 1 ) "*"    "*"    "*"     "*"     "*"    
22  ( 1 ) "*"    "*"    "*"     "*"     "*"    
23  ( 1 ) "*"    "*"    "*"     "*"     "*"    
24  ( 1 ) "*"    "*"    "*"     "*"     "*"    
25  ( 1 ) "*"    "*"    "*"     "*"     "*"    
26  ( 1 ) "*"    "*"    "*"     "*"     "*"    
27  ( 1 ) "*"    "*"    "*"     "*"     "*"    
28  ( 1 ) "*"    "*"    "*"     "*"     "*"    
29  ( 1 ) "*"    "*"    "*"     "*"     "*"    
plot(regsubset_forwards,
     scale = "bic")


sum_forward <- summary(regsubset_forwards)

plot(sum_forward$bic,
     type = "b")

# glmulti


glmulti_fit <- glmulti(
  log(average_price) ~ ., # model to fit, in this case, charges varies with everything
  level = 2, # level = 2 means try pairwise interactions. level = 1 means main effects only
  data = train, # data to use for fitting
  minsize = 0, # min size of model to try, in number of predictors
  maxsize = -1, # max size to try, set to -1 for unlimited
  marginality = TRUE, # marginality true means include pairwise interaction only if both main effects present in model.  
  method = "d", # method "d" means trial run, to get size of problem. Set to "h" for exhaustive search, or "g" for genetic algorithm
  confsetsize = 10, # how many models should glmulti() return? Must be less than total size of problem
  plotty = FALSE, # provide progress plots? Generally annoying.
  report = TRUE, # provide progress reports? Generally useful.
  fitfunction = lm, # use lm() as fit function. Can also use glm() for logistic regression.
  crit = aic # criterion for selecting best models. 
)
Initialization...
TASK: Diagnostic of candidate set.
Sample size: 2164
4 factor(s).
7 covariate(s).
0 f exclusion(s).
0 c exclusion(s).
0 f:f exclusion(s).
0 c:c exclusion(s).
0 f:c exclusion(s).
Size constraints: min =  0 max = -1
Complexity constraints: min =  0 max = -1
Marginality rule.
Your candidate set contains more than 1 billion (1e9) models.
lm_multi <- lm(log(average_price)~1+type+year+region+month+total_volume+x4046+x4225+x4770+total_bags+small_bags+large_bags+year:type+region:type+region:year+month:type+month:year+month:region+x4046:total_volume+x4225:total_volume+total_bags:x4225+total_bags:x4770+small_bags:total_volume+small_bags:x4046+small_bags:x4225+small_bags:x4770+small_bags:total_bags+large_bags:total_volume+large_bags:small_bags+type:total_volume+type:x4225+type:total_bags+year:total_volume+year:x4046+year:x4770+year:total_bags+year:large_bags+region:total_volume+region:x4225+region:total_bags+region:large_bags+month:total_volume+month:x4770+month:small_bags, train) 

summary(lm_multi)

Call:
lm(formula = log(average_price) ~ 1 + type + year + region + 
    month + total_volume + x4046 + x4225 + x4770 + total_bags + 
    small_bags + large_bags + year:type + region:type + region:year + 
    month:type + month:year + month:region + x4046:total_volume + 
    x4225:total_volume + total_bags:x4225 + total_bags:x4770 + 
    small_bags:total_volume + small_bags:x4046 + small_bags:x4225 + 
    small_bags:x4770 + small_bags:total_bags + large_bags:total_volume + 
    large_bags:small_bags + type:total_volume + type:x4225 + 
    type:total_bags + year:total_volume + year:x4046 + year:x4770 + 
    year:total_bags + year:large_bags + region:total_volume + 
    region:x4225 + region:total_bags + region:large_bags + month:total_volume + 
    month:x4770 + month:small_bags, data = train)

Residuals:
      Min        1Q    Median        3Q       Max 
-0.221001 -0.032174  0.001345  0.033539  0.192423 

Coefficients: (9 not defined because of singularities)
                                  Estimate Std. Error t value Pr(>|t|)    
(Intercept)                      8.165e-03  4.094e-01   0.020 0.984090    
typeorganic                     -2.784e-01  7.165e-02  -3.886 0.000105 ***
year2016                         2.264e-01  2.893e-02   7.825 8.33e-15 ***
year2017                         2.530e-01  2.917e-02   8.673  < 2e-16 ***
year2018                         5.585e-01  4.375e-02  12.765  < 2e-16 ***
regionGreatLakes                -1.878e-01  6.510e-02  -2.884 0.003970 ** 
regionMidsouth                  -3.676e-01  6.743e-02  -5.451 5.67e-08 ***
regionNortheast                 -1.116e-01  6.981e-02  -1.598 0.110148    
regionPlains                    -3.348e-01  7.045e-02  -4.752 2.17e-06 ***
regionSouthCentral              -2.783e-01  6.355e-02  -4.379 1.26e-05 ***
regionSoutheast                 -2.106e-01  6.151e-02  -3.423 0.000632 ***
regionWest                      -1.405e-01  5.894e-02  -2.383 0.017256 *  
month2                           4.356e-02  5.527e-02   0.788 0.430685    
month3                           9.905e-02  5.560e-02   1.781 0.075010 .  
month4                           1.934e-01  5.974e-02   3.237 0.001230 ** 
month5                           1.794e-01  6.119e-02   2.931 0.003416 ** 
month6                           2.517e-01  7.364e-02   3.419 0.000643 ***
month7                           2.664e-01  6.506e-02   4.094 4.41e-05 ***
month8                           3.411e-01  6.150e-02   5.546 3.33e-08 ***
month9                           1.978e-01  6.144e-02   3.219 0.001307 ** 
month10                          2.245e-01  5.838e-02   3.845 0.000124 ***
month11                          8.393e-02  5.889e-02   1.425 0.154253    
month12                         -1.671e-02  5.856e-02  -0.285 0.775332    
total_volume                     3.166e-04  1.371e-04   2.309 0.021043 *  
x4046                           -3.165e-04  1.371e-04  -2.308 0.021079 *  
x4225                           -3.167e-04  1.371e-04  -2.310 0.021003 *  
x4770                           -3.166e-04  1.371e-04  -2.310 0.021017 *  
total_bags                      -3.166e-04  1.371e-04  -2.309 0.021060 *  
small_bags                       7.795e-01  4.104e-01   1.899 0.057662 .  
large_bags                       5.540e-01  4.177e-01   1.326 0.184886    
typeorganic:year2016            -1.797e-01  2.226e-02  -8.070 1.24e-15 ***
typeorganic:year2017            -1.683e-01  2.358e-02  -7.138 1.34e-12 ***
typeorganic:year2018            -1.956e-01  3.689e-02  -5.303 1.27e-07 ***
typeorganic:regionGreatLakes     2.345e-01  6.041e-02   3.883 0.000107 ***
typeorganic:regionMidsouth       4.407e-01  6.377e-02   6.911 6.54e-12 ***
typeorganic:regionNortheast      3.100e-01  6.615e-02   4.687 2.97e-06 ***
typeorganic:regionPlains         4.703e-01  6.813e-02   6.903 6.92e-12 ***
typeorganic:regionSouthCentral   1.715e-01  6.020e-02   2.848 0.004442 ** 
typeorganic:regionSoutheast      3.349e-01  5.767e-02   5.807 7.43e-09 ***
typeorganic:regionWest           3.563e-01  4.709e-02   7.566 5.96e-14 ***
year2016:regionGreatLakes       -9.946e-02  2.185e-02  -4.551 5.67e-06 ***
year2017:regionGreatLakes       -5.205e-02  2.139e-02  -2.433 0.015047 *  
year2018:regionGreatLakes       -1.579e-01  3.763e-02  -4.196 2.85e-05 ***
year2016:regionMidsouth         -4.469e-02  1.759e-02  -2.541 0.011140 *  
year2017:regionMidsouth         -2.629e-03  1.684e-02  -0.156 0.875938    
year2018:regionMidsouth         -1.130e-01  2.918e-02  -3.874 0.000111 ***
year2016:regionNortheast         4.364e-02  1.699e-02   2.568 0.010292 *  
year2017:regionNortheast         9.924e-02  1.700e-02   5.837 6.24e-09 ***
year2018:regionNortheast         6.734e-02  3.189e-02   2.112 0.034848 *  
year2016:regionPlains           -1.492e-01  1.919e-02  -7.775 1.22e-14 ***
year2017:regionPlains           -1.797e-01  1.877e-02  -9.572  < 2e-16 ***
year2018:regionPlains           -3.886e-01  3.472e-02 -11.193  < 2e-16 ***
year2016:regionSouthCentral     -8.497e-02  1.643e-02  -5.172 2.57e-07 ***
year2017:regionSouthCentral     -7.938e-02  1.558e-02  -5.094 3.85e-07 ***
year2018:regionSouthCentral     -1.206e-01  2.916e-02  -4.136 3.68e-05 ***
year2016:regionSoutheast        -1.606e-01  1.823e-02  -8.807  < 2e-16 ***
year2017:regionSoutheast        -1.263e-01  1.784e-02  -7.079 2.04e-12 ***
year2018:regionSoutheast        -3.494e-01  3.109e-02 -11.237  < 2e-16 ***
year2016:regionWest             -1.693e-01  2.585e-02  -6.551 7.36e-11 ***
year2017:regionWest             -7.295e-02  2.652e-02  -2.751 0.005993 ** 
year2018:regionWest             -2.010e-01  4.993e-02  -4.026 5.89e-05 ***
typeorganic:month2              -3.940e-02  2.835e-02  -1.390 0.164707    
typeorganic:month3              -7.479e-02  2.903e-02  -2.576 0.010058 *  
typeorganic:month4              -4.909e-02  3.198e-02  -1.535 0.124877    
typeorganic:month5              -6.119e-02  3.216e-02  -1.903 0.057238 .  
typeorganic:month6              -5.747e-02  3.696e-02  -1.555 0.120154    
typeorganic:month7              -4.697e-02  3.410e-02  -1.377 0.168534    
typeorganic:month8              -4.344e-02  3.278e-02  -1.325 0.185280    
typeorganic:month9               4.129e-02  3.183e-02   1.297 0.194727    
typeorganic:month10             -7.466e-02  3.085e-02  -2.420 0.015603 *  
typeorganic:month11              1.555e-02  3.114e-02   0.499 0.617628    
typeorganic:month12              6.876e-02  3.062e-02   2.246 0.024828 *  
year2016:month2                  1.078e-02  1.656e-02   0.651 0.515194    
year2017:month2                  1.737e-02  1.589e-02   1.093 0.274372    
year2018:month2                 -5.682e-03  1.637e-02  -0.347 0.728638    
year2016:month3                  1.382e-02  1.661e-02   0.832 0.405684    
year2017:month3                  1.186e-01  1.604e-02   7.393 2.14e-13 ***
year2018:month3                  2.810e-03  1.621e-02   0.173 0.862403    
year2016:month4                 -5.759e-02  1.623e-02  -3.547 0.000399 ***
year2017:month4                  1.362e-01  1.554e-02   8.765  < 2e-16 ***
year2018:month4                         NA         NA      NA       NA    
year2016:month5                 -4.515e-02  1.548e-02  -2.917 0.003579 ** 
year2017:month5                  1.960e-01  1.566e-02  12.520  < 2e-16 ***
year2018:month5                         NA         NA      NA       NA    
year2016:month6                 -1.890e-02  1.669e-02  -1.132 0.257598    
year2017:month6                  1.659e-01  1.672e-02   9.922  < 2e-16 ***
year2018:month6                         NA         NA      NA       NA    
year2016:month7                  5.033e-02  1.611e-02   3.125 0.001805 ** 
year2017:month7                  1.556e-01  1.568e-02   9.919  < 2e-16 ***
year2018:month7                         NA         NA      NA       NA    
year2016:month8                  4.055e-02  1.607e-02   2.524 0.011692 *  
year2017:month8                  2.032e-01  1.576e-02  12.894  < 2e-16 ***
year2018:month8                         NA         NA      NA       NA    
year2016:month9                  5.450e-02  1.695e-02   3.215 0.001326 ** 
year2017:month9                  2.789e-01  1.592e-02  17.516  < 2e-16 ***
year2018:month9                         NA         NA      NA       NA    
year2016:month10                 1.130e-01  1.710e-02   6.605 5.13e-11 ***
year2017:month10                 2.422e-01  1.659e-02  14.598  < 2e-16 ***
year2018:month10                        NA         NA      NA       NA    
year2016:month11                 1.736e-01  1.648e-02  10.534  < 2e-16 ***
year2017:month11                 1.820e-01  1.598e-02  11.395  < 2e-16 ***
year2018:month11                        NA         NA      NA       NA    
year2016:month12                 3.590e-02  1.671e-02   2.149 0.031772 *  
year2017:month12                 1.342e-01  1.647e-02   8.145 6.79e-16 ***
year2018:month12                        NA         NA      NA       NA    
regionGreatLakes:month2         -5.116e-02  2.694e-02  -1.899 0.057724 .  
regionMidsouth:month2           -5.640e-02  2.445e-02  -2.306 0.021201 *  
regionNortheast:month2           1.266e-02  2.342e-02   0.540 0.588972    
regionPlains:month2             -6.890e-02  2.673e-02  -2.577 0.010039 *  
regionSouthCentral:month2       -1.542e-02  2.203e-02  -0.700 0.484020    
regionSoutheast:month2          -5.687e-02  2.592e-02  -2.194 0.028341 *  
regionWest:month2               -1.872e-03  2.706e-02  -0.069 0.944864    
regionGreatLakes:month3         -5.322e-02  2.686e-02  -1.981 0.047714 *  
regionMidsouth:month3           -7.974e-02  2.378e-02  -3.353 0.000815 ***
regionNortheast:month3          -4.667e-02  2.269e-02  -2.057 0.039843 *  
regionPlains:month3             -8.713e-02  2.599e-02  -3.352 0.000818 ***
regionSouthCentral:month3       -3.027e-02  2.208e-02  -1.371 0.170585    
regionSoutheast:month3          -8.725e-02  2.493e-02  -3.499 0.000477 ***
regionWest:month3               -6.355e-03  2.644e-02  -0.240 0.810116    
regionGreatLakes:month4         -7.740e-02  2.934e-02  -2.638 0.008396 ** 
regionMidsouth:month4           -7.437e-02  2.648e-02  -2.808 0.005032 ** 
regionNortheast:month4          -2.576e-02  2.511e-02  -1.026 0.305047    
regionPlains:month4             -9.680e-02  2.879e-02  -3.362 0.000788 ***
regionSouthCentral:month4       -3.464e-02  2.391e-02  -1.449 0.147579    
regionSoutheast:month4          -8.844e-02  2.684e-02  -3.295 0.001002 ** 
regionWest:month4               -5.382e-02  2.866e-02  -1.878 0.060526 .  
regionGreatLakes:month5         -5.397e-02  3.033e-02  -1.779 0.075371 .  
regionMidsouth:month5           -7.007e-02  2.705e-02  -2.591 0.009649 ** 
regionNortheast:month5          -1.297e-03  2.491e-02  -0.052 0.958465    
regionPlains:month5             -7.402e-02  2.900e-02  -2.552 0.010787 *  
regionSouthCentral:month5       -1.575e-02  2.442e-02  -0.645 0.518930    
regionSoutheast:month5          -7.682e-02  2.765e-02  -2.779 0.005512 ** 
regionWest:month5                3.702e-03  2.895e-02   0.128 0.898251    
regionGreatLakes:month6         -1.351e-01  3.620e-02  -3.733 0.000195 ***
regionMidsouth:month6           -1.108e-01  3.067e-02  -3.613 0.000310 ***
regionNortheast:month6          -4.064e-02  2.689e-02  -1.511 0.130916    
regionPlains:month6             -1.275e-01  3.184e-02  -4.003 6.48e-05 ***
regionSouthCentral:month6       -7.873e-02  2.599e-02  -3.030 0.002482 ** 
regionSoutheast:month6          -1.451e-01  3.113e-02  -4.662 3.35e-06 ***
regionWest:month6               -4.059e-02  3.423e-02  -1.186 0.235943    
regionGreatLakes:month7         -1.250e-01  3.315e-02  -3.772 0.000167 ***
regionMidsouth:month7           -1.357e-01  2.801e-02  -4.845 1.37e-06 ***
regionNortheast:month7          -1.183e-01  2.582e-02  -4.583 4.87e-06 ***
regionPlains:month7             -1.613e-01  2.958e-02  -5.453 5.60e-08 ***
regionSouthCentral:month7       -9.911e-02  2.399e-02  -4.131 3.77e-05 ***
regionSoutheast:month7          -1.978e-01  2.753e-02  -7.182 9.79e-13 ***
regionWest:month7               -3.284e-02  3.053e-02  -1.076 0.282137    
regionGreatLakes:month8         -1.541e-01  3.085e-02  -4.994 6.44e-07 ***
regionMidsouth:month8           -1.473e-01  2.679e-02  -5.500 4.30e-08 ***
regionNortheast:month8          -2.010e-01  2.694e-02  -7.460 1.30e-13 ***
regionPlains:month8             -1.741e-01  2.998e-02  -5.809 7.36e-09 ***
regionSouthCentral:month8       -8.039e-02  2.503e-02  -3.211 0.001344 ** 
regionSoutheast:month8          -2.004e-01  2.802e-02  -7.154 1.20e-12 ***
regionWest:month8               -7.251e-02  2.955e-02  -2.454 0.014216 *  
regionGreatLakes:month9         -8.688e-02  3.022e-02  -2.875 0.004083 ** 
regionMidsouth:month9           -9.526e-02  2.731e-02  -3.488 0.000498 ***
regionNortheast:month9          -1.451e-01  2.701e-02  -5.374 8.64e-08 ***
regionPlains:month9             -1.061e-01  2.987e-02  -3.550 0.000394 ***
regionSouthCentral:month9       -3.826e-02  2.492e-02  -1.536 0.124780    
regionSoutheast:month9          -1.027e-01  2.789e-02  -3.681 0.000239 ***
regionWest:month9               -6.269e-02  3.036e-02  -2.065 0.039054 *  
regionGreatLakes:month10        -1.015e-01  2.997e-02  -3.388 0.000718 ***
regionMidsouth:month10          -1.333e-01  2.665e-02  -5.003 6.16e-07 ***
regionNortheast:month10         -1.992e-01  2.537e-02  -7.854 6.71e-15 ***
regionPlains:month10            -1.700e-01  2.888e-02  -5.884 4.71e-09 ***
regionSouthCentral:month10      -3.319e-02  2.386e-02  -1.391 0.164348    
regionSoutheast:month10         -1.208e-01  2.772e-02  -4.359 1.38e-05 ***
regionWest:month10              -4.649e-02  2.989e-02  -1.555 0.120058    
regionGreatLakes:month11        -1.109e-01  3.057e-02  -3.627 0.000294 ***
regionMidsouth:month11          -1.030e-01  2.679e-02  -3.846 0.000124 ***
regionNortheast:month11         -1.235e-01  2.561e-02  -4.822 1.53e-06 ***
regionPlains:month11            -9.488e-02  2.963e-02  -3.203 0.001385 ** 
regionSouthCentral:month11      -1.026e-02  2.487e-02  -0.412 0.680117    
regionSoutheast:month11         -1.334e-01  2.761e-02  -4.832 1.46e-06 ***
regionWest:month11              -4.012e-02  3.018e-02  -1.330 0.183808    
regionGreatLakes:month12        -4.482e-02  2.890e-02  -1.551 0.121157    
regionMidsouth:month12          -4.076e-02  2.610e-02  -1.562 0.118516    
regionNortheast:month12          1.524e-04  2.512e-02   0.006 0.995159    
regionPlains:month12            -5.109e-02  2.847e-02  -1.795 0.072890 .  
regionSouthCentral:month12       1.416e-02  2.346e-02   0.603 0.546279    
regionSoutheast:month12         -9.327e-02  2.770e-02  -3.367 0.000775 ***
regionWest:month12              -1.491e-02  3.144e-02  -0.474 0.635416    
total_volume:x4046               9.095e-15  3.453e-15   2.634 0.008516 ** 
total_volume:x4225               1.971e-14  4.402e-15   4.476 8.05e-06 ***
x4225:total_bags                 3.843e-14  1.085e-14   3.542 0.000406 ***
x4770:total_bags                -1.550e-13  6.747e-14  -2.298 0.021694 *  
total_volume:small_bags         -4.496e-04  2.089e-04  -2.153 0.031455 *  
x4046:small_bags                 4.494e-04  2.089e-04   2.152 0.031558 *  
x4225:small_bags                 4.495e-04  2.089e-04   2.152 0.031518 *  
x4770:small_bags                 4.497e-04  2.088e-04   2.153 0.031432 *  
total_bags:small_bags            4.493e-04  2.089e-04   2.151 0.031570 *  
total_volume:large_bags         -2.281e-07  1.031e-07  -2.213 0.027020 *  
small_bags:large_bags            2.682e-01  5.652e-02   4.744 2.25e-06 ***
typeorganic:total_volume        -1.877e-06  1.644e-07 -11.414  < 2e-16 ***
typeorganic:x4225                2.049e-06  2.275e-07   9.007  < 2e-16 ***
typeorganic:total_bags          -5.807e-07  1.743e-07  -3.331 0.000883 ***
year2016:total_volume           -1.197e-07  1.125e-08 -10.640  < 2e-16 ***
year2017:total_volume           -1.098e-07  1.171e-08  -9.375  < 2e-16 ***
year2018:total_volume           -1.407e-07  1.440e-08  -9.770  < 2e-16 ***
year2016:x4046                   8.473e-08  1.103e-08   7.683 2.47e-14 ***
 [ reached getOption("max.print") -- omitted 72 rows ]
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.05349 on 1901 degrees of freedom
Multiple R-squared:  0.9642,    Adjusted R-squared:  0.9593 
F-statistic: 195.4 on 262 and 1901 DF,  p-value: < 2.2e-16
glance(lm_multi)
train_rmse <- train %>% 
  add_residuals(lm_multi) %>% 
  mutate(sq_resid = resid^2) %>% 
  summarise(mse = mean(sq_resid),
            rmse = mse^0.5) %>% 
  pull(rmse)
Warning: prediction from a rank-deficient fit may be misleading
train_rmse
[1] 0.05013213
predictions_test <- test %>% 
  add_predictions(lm_multi) %>% 
  add_residuals(lm_multi) %>% 
  dplyr::select(average_price, pred, resid) 
Warning: prediction from a rank-deficient fit may be misleadingWarning: prediction from a rank-deficient fit may be misleading
  
test_rsme <- predictions_test %>% 
  mutate(sq_resid = resid^2) %>% 
  summarise(mse = mean(sq_resid),
            rmse = mse^0.5) %>% 
  pull(rmse)

test_rsme / train_rmse
[1] 1.265112
# over fit? despite having a good bic
glmulti_fit <- glmulti(
  log(average_price) ~ ., # model to fit, in this case, charges varies with everything
  level = 2, # level = 2 means try pairwise interactions. level = 1 means main effects only
  data = train, # data to use for fitting
  minsize = 0, # min size of model to try, in number of predictors
  maxsize = 10, # max size to try, set to -1 for unlimited
  marginality = TRUE, # marginality true means include pairwise interaction only if both main effects present in model.  
  method = "d", # method "d" means trial run, to get size of problem. Set to "h" for exhaustive search, or "g" for genetic algorithm
  confsetsize = 1000, # how many models should glmulti() return? Must be less than total size of problem
  plotty = FALSE, # provide progress plots? Generally annoying.
  report = TRUE, # provide progress reports? Generally useful.
  fitfunction = lm, # use lm() as fit function. Can also use glm() for logistic regression.
  crit = aic # criterion for selecting best models. 
)
train = train %>% 
  select(average_price, region, type, year, month, total_volume, large_bags)
lm_multi <- lm(log(average_price)~ 1 + region + type + year + month + total_volume + large_bags + year:type + month:year + region:total_volume + type:total_volume, train)

summary(lm_multi)

Call:
lm(formula = log(average_price) ~ 1 + region + type + year + 
    month + total_volume + large_bags + year:type + month:year + 
    region:total_volume + type:total_volume, data = train)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.34697 -0.04625  0.00192  0.04766  0.53098 

Coefficients: (9 not defined because of singularities)
                                  Estimate Std. Error t value Pr(>|t|)    
(Intercept)                      4.663e-01  1.828e-02  25.506  < 2e-16 ***
regionGreatLakes                -5.460e-02  1.121e-02  -4.871 1.19e-06 ***
regionMidsouth                  -5.180e-02  1.038e-02  -4.993 6.45e-07 ***
regionNortheast                  1.023e-01  9.396e-03  10.884  < 2e-16 ***
regionPlains                    -4.999e-02  1.141e-02  -4.381 1.24e-05 ***
regionSouthCentral              -2.804e-01  9.669e-03 -28.998  < 2e-16 ***
regionSoutheast                 -1.015e-01  1.085e-02  -9.358  < 2e-16 ***
regionWest                       1.086e-01  1.271e-02   8.546  < 2e-16 ***
typeorganic                      1.814e-01  1.465e-02  12.383  < 2e-16 ***
year2016                         4.252e-02  1.509e-02   2.819 0.004869 ** 
year2017                         1.042e-01  1.552e-02   6.716 2.40e-11 ***
year2018                         2.433e-01  1.705e-02  14.265  < 2e-16 ***
month2                           8.632e-03  1.556e-02   0.555 0.579028    
month3                           1.649e-02  1.472e-02   1.120 0.262703    
month4                           6.147e-02  1.530e-02   4.017 6.10e-05 ***
month5                           5.158e-02  1.497e-02   3.446 0.000580 ***
month6                           7.818e-02  1.534e-02   5.095 3.79e-07 ***
month7                           8.055e-02  1.565e-02   5.148 2.88e-07 ***
month8                           8.920e-02  1.428e-02   6.246 5.07e-10 ***
month9                           6.977e-02  1.536e-02   4.543 5.86e-06 ***
month10                          3.473e-02  1.554e-02   2.235 0.025536 *  
month11                         -2.938e-02  1.468e-02  -2.001 0.045483 *  
month12                         -3.794e-02  1.494e-02  -2.538 0.011206 *  
total_volume                    -8.671e-08  2.850e-09 -30.428  < 2e-16 ***
large_bags                      -1.888e-01  1.366e-02 -13.821  < 2e-16 ***
typeorganic:year2016            -8.678e-02  8.663e-03 -10.018  < 2e-16 ***
typeorganic:year2017            -1.263e-01  9.171e-03 -13.767  < 2e-16 ***
typeorganic:year2018            -1.221e-01  1.564e-02  -7.810 8.92e-15 ***
year2016:month2                 -1.740e-02  2.082e-02  -0.836 0.403495    
year2017:month2                 -3.230e-02  2.114e-02  -1.528 0.126740    
year2018:month2                 -2.678e-02  2.166e-02  -1.236 0.216488    
year2016:month3                 -1.764e-02  2.023e-02  -0.872 0.383462    
year2017:month3                  5.821e-02  2.060e-02   2.825 0.004766 ** 
year2018:month3                 -3.319e-02  2.096e-02  -1.583 0.113533    
year2016:month4                 -6.828e-02  2.077e-02  -3.288 0.001025 ** 
year2017:month4                  1.033e-01  2.047e-02   5.046 4.89e-07 ***
year2018:month4                         NA         NA      NA       NA    
year2016:month5                 -3.712e-02  2.012e-02  -1.845 0.065210 .  
year2017:month5                  1.571e-01  2.091e-02   7.512 8.57e-14 ***
year2018:month5                         NA         NA      NA       NA    
year2016:month6                 -2.374e-02  2.067e-02  -1.149 0.250864    
year2017:month6                  1.148e-01  2.102e-02   5.463 5.23e-08 ***
year2018:month6                         NA         NA      NA       NA    
year2016:month7                  4.701e-02  2.073e-02   2.268 0.023445 *  
year2017:month7                  1.328e-01  2.112e-02   6.286 3.95e-10 ***
year2018:month7                         NA         NA      NA       NA    
year2016:month8                  2.643e-02  2.014e-02   1.312 0.189538    
year2017:month8                  1.663e-01  2.044e-02   8.135 6.95e-16 ***
year2018:month8                         NA         NA      NA       NA    
year2016:month9                  6.379e-02  2.077e-02   3.071 0.002159 ** 
year2017:month9                  2.513e-01  2.147e-02  11.706  < 2e-16 ***
year2018:month9                         NA         NA      NA       NA    
year2016:month10                 1.389e-01  2.053e-02   6.766 1.72e-11 ***
year2017:month10                 2.413e-01  2.104e-02  11.464  < 2e-16 ***
year2018:month10                        NA         NA      NA       NA    
year2016:month11                 1.972e-01  2.048e-02   9.626  < 2e-16 ***
year2017:month11                 1.663e-01  2.074e-02   8.017 1.77e-15 ***
year2018:month11                        NA         NA      NA       NA    
year2016:month12                 3.177e-02  2.053e-02   1.547 0.121906    
year2017:month12                 1.024e-01  2.050e-02   4.998 6.29e-07 ***
year2018:month12                        NA         NA      NA       NA    
regionGreatLakes:total_volume   -2.009e-08  3.755e-09  -5.350 9.78e-08 ***
regionMidsouth:total_volume     -3.521e-08  4.501e-09  -7.822 8.14e-15 ***
regionNortheast:total_volume    -9.684e-09  2.902e-09  -3.337 0.000861 ***
regionPlains:total_volume       -1.384e-07  7.841e-09 -17.646  < 2e-16 ***
regionSouthCentral:total_volume  1.061e-08  2.251e-09   4.711 2.62e-06 ***
regionSoutheast:total_volume     1.130e-09  3.422e-09   0.330 0.741157    
regionWest:total_volume         -2.190e-08  2.301e-09  -9.519  < 2e-16 ***
typeorganic:total_volume        -1.213e-06  4.779e-08 -25.372  < 2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.07631 on 2104 degrees of freedom
Multiple R-squared:  0.9171,    Adjusted R-squared:  0.9148 
F-statistic: 394.6 on 59 and 2104 DF,  p-value: < 2.2e-16
glance(lm_multi)
plot(lm_multi)


lm12 <- lm(log(average_price) ~ type + region + month + year + x4046 + total_volume + large_bags + type:total_volume + type:region + type:year + region:total_volume, train)
summary(lm12)

Call:
lm(formula = log(average_price) ~ type + region + month + year + 
    x4046 + total_volume + large_bags + type:total_volume + type:region + 
    type:year + region:total_volume, data = train)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.29711 -0.05424 -0.00103  0.05371  0.58560 

Coefficients:
                                  Estimate Std. Error t value Pr(>|t|)    
(Intercept)                      5.040e-01  4.200e-02  12.000  < 2e-16 ***
typeorganic                      1.132e-01  4.174e-02   2.712 0.006737 ** 
regionGreatLakes                 4.623e-02  5.474e-02   0.845 0.398458    
regionMidsouth                  -6.778e-02  5.790e-02  -1.171 0.241853    
regionNortheast                 -9.002e-02  5.419e-02  -1.661 0.096845 .  
regionPlains                    -1.949e-02  5.207e-02  -0.374 0.708235    
regionSouthCentral              -1.663e-01  6.121e-02  -2.717 0.006641 ** 
regionSoutheast                 -6.441e-02  5.020e-02  -1.283 0.199627    
regionWest                      -1.099e-01  5.461e-02  -2.013 0.044280 *  
month2                          -5.992e-03  8.212e-03  -0.730 0.465728    
month3                           2.174e-02  8.023e-03   2.710 0.006777 ** 
month4                           7.506e-02  8.803e-03   8.527  < 2e-16 ***
month5                           9.300e-02  8.875e-03  10.479  < 2e-16 ***
month6                           1.089e-01  8.942e-03  12.181  < 2e-16 ***
month7                           1.411e-01  8.794e-03  16.051  < 2e-16 ***
month8                           1.452e-01  8.775e-03  16.547  < 2e-16 ***
month9                           1.671e-01  9.052e-03  18.459  < 2e-16 ***
month10                          1.585e-01  8.883e-03  17.847  < 2e-16 ***
month11                          7.760e-02  9.009e-03   8.614  < 2e-16 ***
month12                          2.495e-03  8.786e-03   0.284 0.776443    
year2016                         9.240e-02  7.159e-03  12.907  < 2e-16 ***
year2017                         2.423e-01  6.958e-03  34.822  < 2e-16 ***
year2018                         3.110e-01  1.221e-02  25.475  < 2e-16 ***
x4046                            4.992e-08  9.840e-09   5.073 4.26e-07 ***
total_volume                    -1.229e-07  7.607e-09 -16.158  < 2e-16 ***
large_bags                      -2.163e-01  1.476e-02 -14.657  < 2e-16 ***
typeorganic:total_volume        -1.338e-06  5.432e-08 -24.634  < 2e-16 ***
typeorganic:regionGreatLakes    -9.084e-02  5.398e-02  -1.683 0.092572 .  
typeorganic:regionMidsouth       1.609e-02  5.713e-02   0.282 0.778277    
typeorganic:regionNortheast      1.904e-01  5.361e-02   3.551 0.000393 ***
typeorganic:regionPlains        -3.563e-02  5.227e-02  -0.682 0.495506    
typeorganic:regionSouthCentral  -1.245e-01  6.087e-02  -2.046 0.040916 *  
typeorganic:regionSoutheast     -4.898e-02  5.038e-02  -0.972 0.330996    
typeorganic:regionWest           2.467e-01  5.423e-02   4.549 5.70e-06 ***
typeorganic:year2016            -9.931e-02  9.931e-03 -10.000  < 2e-16 ***
typeorganic:year2017            -1.291e-01  1.028e-02 -12.552  < 2e-16 ***
typeorganic:year2018            -1.448e-01  1.733e-02  -8.356  < 2e-16 ***
regionGreatLakes:total_volume   -4.966e-08  1.280e-08  -3.880 0.000108 ***
regionMidsouth:total_volume     -3.889e-08  1.574e-08  -2.471 0.013540 *  
regionNortheast:total_volume     4.550e-08  1.119e-08   4.066 4.95e-05 ***
regionPlains:total_volume       -1.954e-07  1.911e-08 -10.229  < 2e-16 ***
regionSouthCentral:total_volume -1.441e-08  1.029e-08  -1.401 0.161390    
regionSoutheast:total_volume    -2.368e-08  1.060e-08  -2.233 0.025684 *  
regionWest:total_volume          1.558e-08  8.890e-09   1.753 0.079786 .  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.08421 on 2120 degrees of freedom
Multiple R-squared:  0.8983,    Adjusted R-squared:  0.8962 
F-statistic: 435.4 on 43 and 2120 DF,  p-value: < 2.2e-16
glance(lm12)
plot(lm12)

LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQpgYGB7cn0NCmxpYnJhcnkodGlkeXZlcnNlKQ0KbGlicmFyeShsdWJyaWRhdGUpDQpsaWJyYXJ5KGphbml0b3IpDQpsaWJyYXJ5KGNhcmV0KQ0KbGlicmFyeShicm9vbSkNCmxpYnJhcnkoZmFzdER1bW1pZXMpDQpsaWJyYXJ5KEdHYWxseSkNCmxpYnJhcnkoZ2dmb3J0aWZ5KQ0KbGlicmFyeShtb3NhaWMpDQpsaWJyYXJ5KG1vc2FpY0RhdGEpDQpsaWJyYXJ5KG1vZGVscikNCmxpYnJhcnkocmVsYWltcG8pDQpsaWJyYXJ5KHRpZHl2ZXJzZSkNCmxpYnJhcnkoZ2xtdWx0aSkNCmBgYA0KDQpgYGB7cn0NCmF2b2NhZG9zIDwtIHJlYWRfY3N2KGhlcmU6OmhlcmUoIndlZWtlbmQvZGF0YS9hdm9jYWRvLmNzdiIpKSAlPiUgY2xlYW5fbmFtZXMoKQ0KYGBgDQoNCmBgYHtyfQ0KYXZvY2Fkb3MgJT4lDQogIGRpc3RpbmN0KHJlZ2lvbikNCg0KYXZvY2Fkb3MgJT4lIA0KICBmaWx0ZXIocmVnaW9uID09ICJUb3RhbFVTIiwNCiAgICAgICAgIGRhdGUgPT0gIjIwMTUtMTItMjciKQ0KDQphdm9jYWRvcyAlPiUgDQogIGZpbHRlcihyZWdpb24gJWluJSBjKCJNaWRzb3V0aCIsICJOb3J0aGVhc3QiLCAiUGxhaW5zIiwgIlNvdXRoQ2VudHJhbCIsICJTb3V0aGVhc3QiLCAiV2VzdCIsICJHcmVhdExha2VzIiwgIkNhbGlmb3JuaWEiKSwNCiAgICAgICAgIGRhdGUgPT0gIjIwMTUtMTItMjciKSAlPiUgDQogIGdyb3VwX2J5KHR5cGUpICU+JSANCiAgc3VtbWFyaXNlKHRvdGFsX3ZvbHVtZSA9IHN1bSh0b3RhbF92b2x1bWUpLA0KICAgICAgICAgICAgeDQwNDYgPSBzdW0oeDQwNDYpLA0KICAgICAgICAgICAgdG90YWxfYmFncyA9IHN1bSh0b3RhbF9iYWdzKSkNCg0KYXZvY2Fkb3MgJT4lIA0KICBmaWx0ZXIoIXJlZ2lvbiAlaW4lIGMoIlRvdGFsVVMiLCAiTWlkc291dGgiLCAiTm9ydGhlYXN0IiwgIlBsYWlucyIsICJTb3V0aENlbnRyYWwiLCAiU291dGhlYXN0IiwgIldlc3QiLCAiR3JlYXRMYWtlcyIsICJDYWxpZm9ybmlhIiksDQogICAgICAgICBkYXRlID09ICIyMDE1LTEyLTI3IikgJT4lIA0KICBncm91cF9ieSh0eXBlKSAlPiUgDQogIHN1bW1hcmlzZSh0b3RhbF92b2x1bWUgPSBzdW0odG90YWxfdm9sdW1lKSwNCiAgICAgICAgICAgIHg0MDQ2ID0gc3VtKHg0MDQ2KSwNCiAgICAgICAgICAgIHRvdGFsX2JhZ3MgPSBzdW0odG90YWxfYmFncykpDQoNCiMgTWlzc291dGgsIE5vcnRoZWFzdCwgUGxhaW5zLCBTb3V0aENlbnRyYWwsIFNvdXRoZWFzdCwgV2VzdCwgR3JlYXRMYWtlcywgQ2FsaWZvcm5pYQ0KIyBUaGVzZSBhcmUgdGhlIHJlZ2lvbnMgdGhhdCBtYWtlIHVwIHRoZSB0b3RhbCBVUy4gQWJvdmUgY29kZSBzaG93cyB0aGF0IHRoZSB0b3RhbCBudW1iZXJzIG9mIHRoZSBjYXRlZ29yaWVzIGFyZSB0aGUgc2FtZQ0KDQojIEkgd2lsbCBmaWx0ZXIgdGhlIGRhdGEgc28gdGhhdCBpdCBvbmx5IHVzZXMgdGhlIGxpbmVzIGZvciB0aGUgYWJvdmUgcmVnaW9ucyB0byBhdm9pZCBkb3VibGUgY291bnRpbmcuDQojIG5vIG5lZWQgdG8gd29ycnkgYWJvdXQgdGhlIHNtYWxsZXIgYXJlYXMgYXMgdGhlc2UgYXJlIGNvbnRhaW5lZCB3aXRoaW4gdGhlIGJpZ2dlciByZWdpb25zDQojIGFsc28gaSBkb250IHRoaW5rIGl0IG1ha2VzIHNlbnNlIHRvIGxvb2sgYXQgcmVnaW9ucyB0aGF0IHNtYWxsDQojIGFsc28gYnkgdGhlIGxvb2tzIG9mIGl0IHRoZSByZW1haW5pbmcgY2l0aWVzIGFyZW50IGFuIGV4aGF1c3RpdmUgbGlzdCANCg0KIyBtaWdodCBiZSBnb29kIHRvIGV4dHJhY3QgbW9udGggZnJvbSBkYXRlIGFzIGEgY2F0ZWdvcmljYWwNCiMgcHJvcG9ydGlvbnMgb2YgNDA0NiwgNDIyNSwgNDc3MCBtYXkgYmUgYmV0dGVyIHRoYW4gYWJzb2x1dGVzLCBhbHNvIHByb3BzIG9mIGJhZyBzaXplIG1heSBiZSBiZXR0ZXIgdGhhbiBhYnNvbHV0ZSBudW1iZXJzIA0KYGBgDQpgYGB7cn0NCmF2b2NhZG9zICU+JSANCiAgZmlsdGVyKHJlZ2lvbiAlaW4lIGMoIk1pZHNvdXRoIiwgDQogICAgICAgICAgICAgICAgICAgICAgICJOb3J0aGVhc3QiLCANCiAgICAgICAgICAgICAgICAgICAgICAgIlBsYWlucyIsIA0KICAgICAgICAgICAgICAgICAgICAgICAiU291dGhDZW50cmFsIiwgDQogICAgICAgICAgICAgICAgICAgICAgICJTb3V0aGVhc3QiLCANCiAgICAgICAgICAgICAgICAgICAgICAgIldlc3QiLCANCiAgICAgICAgICAgICAgICAgICAgICAgIkdyZWF0TGFrZXMiLCANCiAgICAgICAgICAgICAgICAgICAgICAgIkNhbGlmb3JuaWEiKSkgJT4lIA0KICBtdXRhdGUoYSA9IHJvdW5kKHNtYWxsX2JhZ3MgKyBsYXJnZV9iYWdzICsgeF9sYXJnZV9iYWdzLCAyKSAtIHJvdW5kKHRvdGFsX2JhZ3MsIDIpKSAlPiUgDQogIGFycmFuZ2UoZGVzYyhhKSkNCg0KYXZvY2Fkb3MNCg0KDQoNCmFsaWFzKGxtKGF2ZXJhZ2VfcHJpY2UgfiAuLCBkYXRhID0gYXZvY2Fkb3MpKQ0KDQojIGZvciBzb21lIHJlYXNvbiB0aGUgYmFnIHNpemVzIGRvbnQgcHJvcGVybHkgYWRkIHVwIHRvIHRoZSB0b3RhbF9iYWdzDQojIHdpdGhvdXQgdGhlIHJvdW5kIHRoZXkgYXJlIGRpZmZlcmVudCBieSBsaWtlIDAuMDAwMDAwMDAwMDAxDQojIHdpdGggdGhlIHJvdW5kIHRoZXJlIGFyZSBhIGNvdXBsZSBhdCBlaXRoZXIgZW5kIG91dCBieSArLTEgDQojIHRoaXMgbWVhbnMgdGhleSBkb250IHNob3cgdXAgaW4gYWxpYXMgLSBJJ20gZ29pbmcgdG8gbWFudWFsIHJlbW92ZSB4IGxhcmdlcyBiYWdzIGFzIHRoaXMgY2FuIGJlIGRlcml2ZWQgZnJvbSB0aGUgb3RoZXIgMw0KIyB4NDA0NiwgeDQyMjUgYW5kIHggNDc3MCBkb250IGFkZCB1cCB0byB0aGUgdG90YWwgdm9sdW1lIHNvIGNhbiBrZWVwIHRoZW0gYWxsDQpgYGANCg0KYGBge3J9DQphdm9jYWRvcyA8LSBhdm9jYWRvcyAlPiUgZmlsdGVyKHJlZ2lvbiAlaW4lIGMoIk1pZHNvdXRoIiwgDQogICAgICAgICAgICAgICAgICAgICAgICJOb3J0aGVhc3QiLCANCiAgICAgICAgICAgICAgICAgICAgICAgIlBsYWlucyIsIA0KICAgICAgICAgICAgICAgICAgICAgICAiU291dGhDZW50cmFsIiwgDQogICAgICAgICAgICAgICAgICAgICAgICJTb3V0aGVhc3QiLCANCiAgICAgICAgICAgICAgICAgICAgICAgIldlc3QiLCANCiAgICAgICAgICAgICAgICAgICAgICAgIkdyZWF0TGFrZXMiLCANCiAgICAgICAgICAgICAgICAgICAgICAgIkNhbGlmb3JuaWEiKSkgJT4lIA0KICBtdXRhdGUobW9udGggPSBhcy5mYWN0b3IobW9udGgoZGF0ZSkpLA0KICAgICAgICAgYWNyb3NzKHdoZXJlKGlzLmNoYXJhY3RlciksIGFzLmZhY3RvciksDQogICAgICAgICB5ZWFyID0gYXMuZmFjdG9yKHllYXIpKSAlPiUgDQogIGRwbHlyOjpzZWxlY3QoLXgxLCAtZGF0ZSwgLXhfbGFyZ2VfYmFncykgDQogIA0KYGBgDQoNCmBgYHtyfQ0KYXZvY2Fkb3MgJT4lIA0KICBkcGx5cjo6c2VsZWN0KGF2ZXJhZ2VfcHJpY2UsIGlzLmZhY3RvcikgJT4lIA0KICBnZ3BhaXJzKCkgDQoNCiMgbW9udGgsIHR5cGUgYW5kIHJlZ2lvbiBhbGwgcXVpdGUgc3Ryb25nDQoNCmF2b2NhZG9zICU+JSANCiAgZHBseXI6OnNlbGVjdChpcy5udW1lcmljKSAlPiUgDQogIGdncGFpcnMoKSANCg0KIyB0b3RhbCB2b2x1bWUgYW5kIHRvdGFsIGJhZ3Mgc3Ryb25nZXN0DQoNCmF2b2NhZG9zX3Byb3AgPC0gYXZvY2Fkb3MgJT4lIA0KICBtdXRhdGUoYWNyb3NzKHNtYWxsX2JhZ3M6bGFyZ2VfYmFncywgfiAueCAvIHRvdGFsX2JhZ3MpKQ0KDQphdm9jYWRvc19wcm9wICU+JSANCiAgZHBseXI6OnNlbGVjdChpcy5udW1lcmljKSAlPiUgDQogIGdncGFpcnMoKSANCg0KI3Byb3BzIGRvbiByZWFsbHkgaGVscCANCg0KDQoNCg0KYGBgDQoNCmBgYHtyfQ0Kbl9kYXRhIDwtIG5yb3coYXZvY2Fkb3NfcHJvcCkNCg0KdGVzdF9pbmRleCA8LSBzYW1wbGUoMTpuX2RhdGEsIHNpemUgPSBuX2RhdGEgKiAwLjIpDQoNCnRlc3QgPC0gYXZvY2Fkb3NfcHJvcCAlPiUgDQogIHNsaWNlKHRlc3RfaW5kZXgpDQoNCnRyYWluIDwtIGF2b2NhZG9zX3Byb3AgJT4lIA0KICBzbGljZSgtdGVzdF9pbmRleCkNCmBgYA0KDQoNCmBgYHtyfQ0KbG0xIDwtIGxtKGF2ZXJhZ2VfcHJpY2UgfiB0b3RhbF92b2x1bWUsIHRyYWluKQ0KDQpzdW1tYXJ5KGxtMSkNCnBsb3RNb2RlbChsbTEpDQpwbG90KGxtMSkNCg0KIyBkaWFnbm9zdG9jcyBhcmUgbWFkIGkgdGhpbmsgdGhpcyBpcyBiZWNhdXNlIG9mIHRoZSBvcmdhbmljIHR5cGUNCg0KDQoNCmBgYA0KDQpgYGB7cn0NCmF2b2NhZG9zX3Jlc2lkIDwtIHRyYWluICU+JSANCiAgYWRkX3Jlc2lkdWFscyhsbTEpICU+JSANCiAgZHBseXI6OnNlbGVjdCgtYXZlcmFnZV9wcmljZSwgLXRvdGFsX3ZvbHVtZSkgJT4lIA0KICBkcGx5cjo6c2VsZWN0KHJlc2lkLCBldmVyeXRoaW5nKCkpIA0KDQoNCmF2b2NhZG9zX3Jlc2lkICU+JSANCiAgZHBseXI6OnNlbGVjdChyZXNpZCwgaXMuZmFjdG9yKSAlPiUgDQogIGdncGFpcnMoKSANCg0KIyBtb250aCwgdHlwZSBhbmQgcmVnaW9uIGFsbCBxdWl0ZSBzdHJvbmcNCg0KYXZvY2Fkb3NfcmVzaWQgJT4lIA0KIGRwbHlyOjpzZWxlY3QoaXMubnVtZXJpYykgJT4lIA0KICBnZ3BhaXJzKCkgDQoNCmBgYA0KDQpgYGB7cn0NCnRyYWluICU+JSANCiAgZ2dwbG90KGFlcyh4ID0geDQwNDYsIHkgPSBhdmVyYWdlX3ByaWNlLCBjb2xvdXIgPSB0eXBlKSkgKw0KICBnZW9tX3BvaW50KCkgKw0KICBnZW9tX3Ntb290aChtZXRob2QgPSAibG0iLCBzZSA9IEZBTFNFKQ0KYGBgDQoNCmBgYHtyfQ0KIyBmaWx0ZXIgYnkgdHlwZSB0byBjaGVjayBpZiB0aGUgYXJlIGNvcnJlbGF0ZWQgd2l0aCBkaWZmZXJlbnQgdGhpbmdzDQoNCmF2b2NhZG9zICU+JSANCiAgZmlsdGVyKHR5cGUgPT0gImNvbnZlbnRpb25hbCIpICU+JSANCiAgZHBseXI6OnNlbGVjdChhdmVyYWdlX3ByaWNlLCBpcy5mYWN0b3IpICU+JSANCiAgZ2dwYWlycygpIA0KDQojIG1vbnRoLCB0eXBlIGFuZCByZWdpb24gYWxsIHF1aXRlIHN0cm9uZw0KDQphdm9jYWRvcyAlPiUgDQogIGZpbHRlcih0eXBlID09ICJjb252ZW50aW9uYWwiKSAlPiUNCiAgZHBseXI6OnNlbGVjdChpcy5udW1lcmljKSAlPiUgDQogIGdncGFpcnMoKSANCg0KIyB0b3RhbCB2b2x1bWUgYW5kIHRvdGFsIGJhZ3Mgc3Ryb25nZXN0DQoNCmF2b2NhZG9zX3Byb3AgJT4lIA0KICBmaWx0ZXIodHlwZSA9PSAiY29udmVudGlvbmFsIikgJT4lDQogIGRwbHlyOjpzZWxlY3QoaXMubnVtZXJpYykgJT4lIA0KICBnZ3BhaXJzKCkNCg0KDQoNCg0KYXZvY2Fkb3MgJT4lIA0KICBmaWx0ZXIodHlwZSA9PSAib3JnYW5pYyIpICU+JSANCiAgZHBseXI6OnNlbGVjdChhdmVyYWdlX3ByaWNlLCBpcy5mYWN0b3IpICU+JSANCiAgZ2dwYWlycygpIA0KDQojIG1vbnRoLCB0eXBlIGFuZCByZWdpb24gYWxsIHF1aXRlIHN0cm9uZw0KDQphdm9jYWRvcyAlPiUgDQogIGZpbHRlcih0eXBlID09ICJvcmdhbmljIikgJT4lDQogIGRwbHlyOjpzZWxlY3QoaXMubnVtZXJpYykgJT4lIA0KICBnZ3BhaXJzKCkgDQoNCiMgdG90YWwgdm9sdW1lIGFuZCB0b3RhbCBiYWdzIHN0cm9uZ2VzdA0KDQphdm9jYWRvc19wcm9wICU+JSANCiAgZmlsdGVyKHR5cGUgPT0gIm9yZ2FuaWMiKSAlPiUNCiAgZHBseXI6OnNlbGVjdChpcy5udW1lcmljKSAlPiUgDQogIGdncGFpcnMoKSANCg0KDQojcHJvcHMgZG9uIHJlYWxseSBoZWxwIA0KYGBgDQoNCg0KDQpgYGB7cn0NCiMgZ29pbmcgdG8gZG8gdHlwZSBmaXJzdA0KDQpsbTIgPC0gbG0oYXZlcmFnZV9wcmljZSB+IHR5cGUsIHRyYWluKQ0KDQpzdW1tYXJ5KGxtMikNCnBsb3QobG0yKQ0KDQoNCmBgYA0KDQoNCmBgYHtyfQ0KYXZvY2Fkb3NfcmVzaWQgPC0gdHJhaW4gJT4lIA0KICBhZGRfcmVzaWR1YWxzKGxtMikgJT4lIA0KICBkcGx5cjo6c2VsZWN0KC1hdmVyYWdlX3ByaWNlLCAtdHlwZSkgJT4lIA0KICBkcGx5cjo6c2VsZWN0KHJlc2lkLCBldmVyeXRoaW5nKCkpIA0KDQoNCmF2b2NhZG9zX3Jlc2lkICU+JSANCiAgZHBseXI6OnNlbGVjdChyZXNpZCwgaXMuZmFjdG9yKSAlPiUgDQogIGdncGFpcnMoKSANCg0KIyBtb250aCwgdHlwZSBhbmQgcmVnaW9uIGFsbCBxdWl0ZSBzdHJvbmcNCg0KYXZvY2Fkb3NfcmVzaWQgJT4lIA0KICBkcGx5cjo6c2VsZWN0KGlzLm51bWVyaWMpICU+JSANCiAgZ2dwYWlycygpIA0KYGBgDQoNCg0KDQpgYGB7cn0NCmxtMyA8LSBsbShhdmVyYWdlX3ByaWNlIH4gdHlwZSArIHJlZ2lvbiwgdHJhaW4pDQoNCnN1bW1hcnkobG0zKQ0KcGxvdChsbTMpDQoNCmFub3ZhKGxtMiwgbG0zKQ0KDQpgYGANCg0KDQpgYGB7cn0NCmF2b2NhZG9zX3Jlc2lkIDwtIHRyYWluICU+JSANCiAgYWRkX3Jlc2lkdWFscyhsbTMpICU+JSANCiAgZHBseXI6OnNlbGVjdCgtYXZlcmFnZV9wcmljZSwgLXR5cGUsIC1yZWdpb24pICU+JSANCiAgZHBseXI6OnNlbGVjdChyZXNpZCwgZXZlcnl0aGluZygpKSANCg0KDQphdm9jYWRvc19yZXNpZCAlPiUgDQogIGRwbHlyOjpzZWxlY3QocmVzaWQsIGlzLmZhY3RvcikgJT4lIA0KICBnZ3BhaXJzKCkgDQoNCiMgbW9udGgsIHR5cGUgYW5kIHJlZ2lvbiBhbGwgcXVpdGUgc3Ryb25nDQoNCmF2b2NhZG9zX3Jlc2lkICU+JSANCiAgZHBseXI6OnNlbGVjdChpcy5udW1lcmljKSAlPiUgDQogIGdncGFpcnMoKSANCmBgYA0KDQoNCmBgYHtyfQ0KbG00IDwtIGxtKGF2ZXJhZ2VfcHJpY2UgfiB0eXBlICsgcmVnaW9uICsgbW9udGgsIHRyYWluKQ0KDQpzdW1tYXJ5KGxtNCkNCnBsb3QobG00KQ0KDQphbm92YShsbTMsIGxtNCkNCg0KIyBsb2cgaGVscHMgZGlhZ25vc3RpY3MNCg0KbG00IDwtIGxtKGxvZyhhdmVyYWdlX3ByaWNlKSB+IHR5cGUgKyByZWdpb24gKyBtb250aCwgdHJhaW4pDQoNCnN1bW1hcnkobG00KQ0KcGxvdChsbTQpDQoNCmBgYA0KDQpgYGB7cn0NCmF2b2NhZG9zX3Jlc2lkIDwtIHRyYWluICU+JSANCiAgYWRkX3Jlc2lkdWFscyhsbTQpICU+JSANCiAgZHBseXI6OnNlbGVjdCgtYXZlcmFnZV9wcmljZSwgLXR5cGUsIC1yZWdpb24sIC1tb250aCkgJT4lIA0KICBkcGx5cjo6c2VsZWN0KHJlc2lkLCBldmVyeXRoaW5nKCkpIA0KDQoNCmF2b2NhZG9zX3Jlc2lkICU+JSANCiAgZHBseXI6OnNlbGVjdChyZXNpZCwgaXMuZmFjdG9yKSAlPiUgDQogIGdncGFpcnMoKSANCg0KIyBtb250aCwgdHlwZSBhbmQgcmVnaW9uIGFsbCBxdWl0ZSBzdHJvbmcNCg0KYXZvY2Fkb3NfcmVzaWQgJT4lIA0KICBkcGx5cjo6c2VsZWN0KGlzLm51bWVyaWMpICU+JSANCiAgZ2dwYWlycygpIA0KYGBgDQoNCg0KDQpgYGB7cn0NCg0KbG01IDwtIGxtKGxvZyhhdmVyYWdlX3ByaWNlKSB+IHR5cGUgKyByZWdpb24gKyBtb250aCArIHllYXIsIHRyYWluKQ0KDQpzdW1tYXJ5KGxtNSkNCnBsb3QobG01KQ0KDQphbm92YShsbTQsIGxtNSkNCmBgYA0KDQpgYGB7cn0NCmF2b2NhZG9zX3Jlc2lkIDwtIHRyYWluICU+JSANCiAgYWRkX3Jlc2lkdWFscyhsbTUpICU+JSANCiAgZHBseXI6OnNlbGVjdCgtYXZlcmFnZV9wcmljZSwgLXR5cGUsIC1yZWdpb24sIC1tb250aCwgLXllYXIpICU+JSANCiAgZHBseXI6OnNlbGVjdChyZXNpZCwgZXZlcnl0aGluZygpKSANCg0KDQphdm9jYWRvc19yZXNpZCAlPiUgDQogIGRwbHlyOjpzZWxlY3QocmVzaWQsIGlzLmZhY3RvcikgJT4lIA0KICBnZ3BhaXJzKCkgDQoNCiMgbW9udGgsIHR5cGUgYW5kIHJlZ2lvbiBhbGwgcXVpdGUgc3Ryb25nDQoNCmF2b2NhZG9zX3Jlc2lkICU+JSANCiAgZHBseXI6OnNlbGVjdChpcy5udW1lcmljKSAlPiUgDQogIGdncGFpcnMoKSANCmBgYA0KDQpgYGB7cn0NCmxtNiA8LSBsbShsb2coYXZlcmFnZV9wcmljZSkgfiB0eXBlICsgcmVnaW9uICsgbW9udGggKyB5ZWFyICsgeDQwNDYsIHRyYWluKQ0KDQpzdW1tYXJ5KGxtNikNCnBsb3QobG02KQ0KDQpgYGANCg0KYGBge3J9DQphdm9jYWRvc19yZXNpZCA8LSB0cmFpbiAlPiUgDQogIGFkZF9yZXNpZHVhbHMobG02KSAlPiUgDQogIGRwbHlyOjpzZWxlY3QoLWF2ZXJhZ2VfcHJpY2UsIC10eXBlLCAtcmVnaW9uLCAtbW9udGgsIC15ZWFyLCAteDQwNDYpICU+JSANCiAgZHBseXI6OnNlbGVjdChyZXNpZCwgZXZlcnl0aGluZygpKSANCg0KDQphdm9jYWRvc19yZXNpZCAlPiUgDQogIGRwbHlyOjpzZWxlY3QoaXMubnVtZXJpYykgJT4lIA0KICBnZ3BhaXJzKCkgDQpgYGANCg0KDQpgYGB7cn0NCg0KDQoNCmxtNyA8LSBsbShsb2coYXZlcmFnZV9wcmljZSkgfiB0eXBlICsgcmVnaW9uICsgbW9udGggKyB5ZWFyICsgeDQwNDYgKyB0b3RhbF92b2x1bWUsIHRyYWluKQ0KDQpzdW1tYXJ5KGxtNykNCnBsb3QobG03KQ0KYGBgDQoNCmBgYHtyfQ0KYXZvY2Fkb3NfcmVzaWQgPC0gdHJhaW4gJT4lIA0KICBhZGRfcmVzaWR1YWxzKGxtNykgJT4lIA0KICBkcGx5cjo6c2VsZWN0KC1hdmVyYWdlX3ByaWNlLCAtdHlwZSwgLXJlZ2lvbiwgLW1vbnRoLCAteWVhciwgLXg0MDQ2LCAtdG90YWxfdm9sdW1lKSAlPiUgDQogIGRwbHlyOjpzZWxlY3QocmVzaWQsIGV2ZXJ5dGhpbmcoKSkgDQoNCg0KYXZvY2Fkb3NfcmVzaWQgJT4lIA0KICBkcGx5cjo6c2VsZWN0KGlzLm51bWVyaWMpICU+JSANCiAgZ2dwYWlycygpIA0KYGBgDQoNCmBgYHtyfQ0KbG04IDwtIGxtKGxvZyhhdmVyYWdlX3ByaWNlKSB+IHR5cGUgKyByZWdpb24gKyBtb250aCArIHllYXIgKyB4NDA0NiArIHRvdGFsX3ZvbHVtZSArIGxhcmdlX2JhZ3MsIHRyYWluKQ0KbG04YiA8LSBsbShsb2coYXZlcmFnZV9wcmljZSl+MSt0eXBlK3llYXIrcmVnaW9uK21vbnRoK3RvdGFsX3ZvbHVtZSt0b3RhbF9iYWdzK3NtYWxsX2JhZ3MrbGFyZ2VfYmFncywgdHJhaW4pDQoNCnN1bW1hcnkobG04KQ0KcGxvdChsbTgpDQpzdW1tYXJ5KGxtOGIpDQpwbG90KGxtOGIpDQpgYGANCg0KYGBge3J9DQphdm9jYWRvc19yZXNpZCA8LSB0cmFpbiAlPiUgDQogIGFkZF9yZXNpZHVhbHMobG04KSAlPiUgDQogIGRwbHlyOjpzZWxlY3QoLWF2ZXJhZ2VfcHJpY2UsIC10eXBlLCAtcmVnaW9uLCAtbW9udGgsIC15ZWFyLCAteDQwNDYsIC10b3RhbF92b2x1bWUsIC1sYXJnZV9iYWdzKSAlPiUgDQogIGRwbHlyOjpzZWxlY3QocmVzaWQsIGV2ZXJ5dGhpbmcoKSkgDQoNCg0KYXZvY2Fkb3NfcmVzaWQgJT4lIA0KICBkcGx5cjo6c2VsZWN0KGlzLm51bWVyaWMpICU+JSANCiAgZ2dwYWlycygpIA0KYGBgDQoNCmBgYHtyfQ0KbG05IDwtIGxtKGxvZyhhdmVyYWdlX3ByaWNlKSB+IHR5cGUgKyByZWdpb24gKyBtb250aCArIHllYXIgKyB4NDA0NiArIHRvdGFsX3ZvbHVtZSArIGxhcmdlX2JhZ3MsIHRyYWluKQ0KDQpzdW1tYXJ5KGxtOSkNCnBsb3QobG05KQ0KYGBgDQoNCmBgYHtyfQ0KbG0xMCA8LSBsbShsb2coYXZlcmFnZV9wcmljZSkgfiB0eXBlICsgcmVnaW9uICsgbW9udGggKyB5ZWFyICsgeDQwNDYgKyB0b3RhbF92b2x1bWUgKyBsYXJnZV9iYWdzICsgdHlwZTp0b3RhbF92b2x1bWUsIHRyYWluKQ0KDQpzdW1tYXJ5KGxtMTApDQpwbG90KGxtMTApDQpgYGANCg0KDQoNCmBgYHtyfQ0KbG0xMSA8LSBsbShsb2coYXZlcmFnZV9wcmljZSkgfiB0eXBlICsgcmVnaW9uICsgbW9udGggKyB5ZWFyICsgeDQwNDYgKyB0b3RhbF92b2x1bWUgKyBsYXJnZV9iYWdzICsgdHlwZTp0b3RhbF92b2x1bWUgKyB0eXBlOnJlZ2lvbiwgdHJhaW4pDQoNCmxtMTIgPC0gbG0obG9nKGF2ZXJhZ2VfcHJpY2UpIH4gdHlwZSArIHJlZ2lvbiArIG1vbnRoICsgeWVhciArIHg0MDQ2ICsgdG90YWxfdm9sdW1lICsgbGFyZ2VfYmFncyArIHR5cGU6dG90YWxfdm9sdW1lICsgdHlwZTpyZWdpb24gKyB0eXBlOnllYXIgKyByZWdpb246dG90YWxfdm9sdW1lLCB0cmFpbikNCg0Kc3VtbWFyeShsbTExKQ0KcGxvdChsbTExKQ0KZ2xhbmNlKGxtMTEpDQpnbGFuY2UobG0xMikNCmBgYA0KDQoNCmBgYHtyfQ0KdHJhaW5fcm1zZSA8LSB0cmFpbiAlPiUgDQogIGFkZF9yZXNpZHVhbHMobG0xMikgJT4lIA0KICBtdXRhdGUoc3FfcmVzaWQgPSByZXNpZF4yKSAlPiUgDQogIHN1bW1hcmlzZShtc2UgPSBtZWFuKHNxX3Jlc2lkKSwNCiAgICAgICAgICAgIHJtc2UgPSBtc2VeMC41KSAlPiUgDQogIHB1bGwocm1zZSkNCg0KDQp0cmFpbl9ybXNlDQoNCnByZWRpY3Rpb25zX3Rlc3QgPC0gdGVzdCAlPiUgDQogIGFkZF9wcmVkaWN0aW9ucyhsbTEyKSAlPiUgDQogIGFkZF9yZXNpZHVhbHMobG0xMikgJT4lIA0KICBkcGx5cjo6c2VsZWN0KGF2ZXJhZ2VfcHJpY2UsIHByZWQsIHJlc2lkKSANCiAgDQp0ZXN0X3JzbWUgPC0gcHJlZGljdGlvbnNfdGVzdCAlPiUgDQogIG11dGF0ZShzcV9yZXNpZCA9IHJlc2lkXjIpICU+JSANCiAgc3VtbWFyaXNlKG1zZSA9IG1lYW4oc3FfcmVzaWQpLA0KICAgICAgICAgICAgcm1zZSA9IG1zZV4wLjUpICU+JSANCiAgcHVsbChybXNlKQ0KDQp0ZXN0X3JzbWUgLyB0cmFpbl9ybXNlDQpgYGANCg0KDQpgYGB7cn0NCmN2XzEwX2ZvbGQgPC0gdHJhaW5Db250cm9sKA0KICBtZXRob2QgPSAiY3YiLA0KICBudW1iZXIgPSAxMCwNCiAgc2F2ZVByZWRpY3Rpb25zID0gVFJVRQ0KKQ0KDQptb2RlbDIgPC0gdHJhaW4obG9nKGF2ZXJhZ2VfcHJpY2UpIH4gdHlwZSArIHJlZ2lvbiArIG1vbnRoICsgeWVhciArIHg0MDQ2ICsgdG90YWxfdm9sdW1lICsgbGFyZ2VfYmFncyArIHR5cGU6dG90YWxfdm9sdW1lLCANCiAgICAgICAgICAgICAgIGF2b2NhZG9zX3Byb3AsIA0KICAgICAgICAgICAgICAgdHJDb250cm9sID0gY3ZfMTBfZm9sZCwgDQogICAgICAgICAgICAgICBtZXRob2QgPSAibG0iKQ0KDQogbW9kZWwyJHJlc2FtcGxlICU+JSANCiAgc3VtbWFyaXNlKGF2X3IyID0gbWVhbihSc3F1YXJlZCksDQogICAgICAgICAgICBhdl9ybXNlID0gbWVhbihSTVNFKSkNCmBgYA0KDQpgYGB7cn0NCmN2XzEwX2ZvbGQgPC0gdHJhaW5Db250cm9sKA0KICBtZXRob2QgPSAiY3YiLA0KICBudW1iZXIgPSAxMCwNCiAgc2F2ZVByZWRpY3Rpb25zID0gVFJVRQ0KKQ0KDQptb2RlbDIgPC0gdHJhaW4obG9nKGF2ZXJhZ2VfcHJpY2UpIH4gdHlwZSArIHJlZ2lvbiArIG1vbnRoICsgeWVhciArIHg0MDQ2KyB0b3RhbF92b2x1bWUgKyBsYXJnZV9iYWdzICsgdHlwZTp0b3RhbF92b2x1bWUgKyB0eXBlOnJlZ2lvbiArIHR5cGU6eWVhciArIHJlZ2lvbjp0b3RhbF92b2x1bWUsIA0KICAgICAgICAgICAgICAgYXZvY2Fkb3NfcHJvcCwgDQogICAgICAgICAgICAgICB0ckNvbnRyb2wgPSBjdl8xMF9mb2xkLCANCiAgICAgICAgICAgICAgIG1ldGhvZCA9ICJsbSIpDQoNCiBtb2RlbDIkcmVzYW1wbGUgJT4lIA0KICBzdW1tYXJpc2UoYXZfcjIgPSBtZWFuKFJzcXVhcmVkKSwNCiAgICAgICAgICAgIGF2X3Jtc2UgPSBtZWFuKFJNU0UpKQ0KYGBgDQoNCmBgYHtyfQ0KY2FsYy5yZWxpbXAobG05LCB0eXBlID0gImxtZyIsIHJlbGEgPSBUUlVFKQ0KYGBgDQoNCg0KDQoNCkFVVE9NQVRFRA0KDQpgYGB7cn0NCg0KIyMjIyBHQVJCQUdFDQoNCnJlZ3N1YnNldF9mb3J3YXJkcyA8LSByZWdzdWJzZXRzKGF2ZXJhZ2VfcHJpY2UgfiAuLCANCiAgICAgICAgICAgICAgICAgICAgICAgICAgZGF0YSA9IGF2b2NhZG9zX3Byb3AsDQogICAgICAgICAgICAgICAgICAgICAgICAgIG52bWF4ID0gNDAsDQogICAgICAgICAgICAgICAgICAgICAgICAgIG1ldGhvZCA9ICJmb3J3YXJkIikNCg0KDQpzdW1tYXJ5KHJlZ3N1YnNldF9mb3J3YXJkcykNCg0KcGxvdChyZWdzdWJzZXRfZm9yd2FyZHMsDQogICAgIHNjYWxlID0gImJpYyIpDQoNCnN1bV9mb3J3YXJkIDwtIHN1bW1hcnkocmVnc3Vic2V0X2ZvcndhcmRzKQ0KDQpwbG90KHN1bV9mb3J3YXJkJGJpYywNCiAgICAgdHlwZSA9ICJiIikNCmBgYA0KDQpgYGB7cn0NCiMgZ2xtdWx0aQ0KDQoNCmdsbXVsdGlfZml0IDwtIGdsbXVsdGkoDQogIGxvZyhhdmVyYWdlX3ByaWNlKSB+IC4sICMgbW9kZWwgdG8gZml0LCBpbiB0aGlzIGNhc2UsIGNoYXJnZXMgdmFyaWVzIHdpdGggZXZlcnl0aGluZw0KICBsZXZlbCA9IDIsICMgbGV2ZWwgPSAyIG1lYW5zIHRyeSBwYWlyd2lzZSBpbnRlcmFjdGlvbnMuIGxldmVsID0gMSBtZWFucyBtYWluIGVmZmVjdHMgb25seQ0KICBkYXRhID0gdHJhaW4sICMgZGF0YSB0byB1c2UgZm9yIGZpdHRpbmcNCiAgbWluc2l6ZSA9IDAsICMgbWluIHNpemUgb2YgbW9kZWwgdG8gdHJ5LCBpbiBudW1iZXIgb2YgcHJlZGljdG9ycw0KICBtYXhzaXplID0gLTEsICMgbWF4IHNpemUgdG8gdHJ5LCBzZXQgdG8gLTEgZm9yIHVubGltaXRlZA0KICBtYXJnaW5hbGl0eSA9IFRSVUUsICMgbWFyZ2luYWxpdHkgdHJ1ZSBtZWFucyBpbmNsdWRlIHBhaXJ3aXNlIGludGVyYWN0aW9uIG9ubHkgaWYgYm90aCBtYWluIGVmZmVjdHMgcHJlc2VudCBpbiBtb2RlbC4gIA0KICBtZXRob2QgPSAiZCIsICMgbWV0aG9kICJkIiBtZWFucyB0cmlhbCBydW4sIHRvIGdldCBzaXplIG9mIHByb2JsZW0uIFNldCB0byAiaCIgZm9yIGV4aGF1c3RpdmUgc2VhcmNoLCBvciAiZyIgZm9yIGdlbmV0aWMgYWxnb3JpdGhtDQogIGNvbmZzZXRzaXplID0gMTAsICMgaG93IG1hbnkgbW9kZWxzIHNob3VsZCBnbG11bHRpKCkgcmV0dXJuPyBNdXN0IGJlIGxlc3MgdGhhbiB0b3RhbCBzaXplIG9mIHByb2JsZW0NCiAgcGxvdHR5ID0gRkFMU0UsICMgcHJvdmlkZSBwcm9ncmVzcyBwbG90cz8gR2VuZXJhbGx5IGFubm95aW5nLg0KICByZXBvcnQgPSBUUlVFLCAjIHByb3ZpZGUgcHJvZ3Jlc3MgcmVwb3J0cz8gR2VuZXJhbGx5IHVzZWZ1bC4NCiAgZml0ZnVuY3Rpb24gPSBsbSwgIyB1c2UgbG0oKSBhcyBmaXQgZnVuY3Rpb24uIENhbiBhbHNvIHVzZSBnbG0oKSBmb3IgbG9naXN0aWMgcmVncmVzc2lvbi4NCiAgY3JpdCA9IGFpYyAjIGNyaXRlcmlvbiBmb3Igc2VsZWN0aW5nIGJlc3QgbW9kZWxzLiANCikNCmBgYA0KDQoNCmBgYHtyfQ0KbG1fbXVsdGkgPC0gbG0obG9nKGF2ZXJhZ2VfcHJpY2UpfjErdHlwZSt5ZWFyK3JlZ2lvbittb250aCt0b3RhbF92b2x1bWUreDQwNDYreDQyMjUreDQ3NzArdG90YWxfYmFncytzbWFsbF9iYWdzK2xhcmdlX2JhZ3MreWVhcjp0eXBlK3JlZ2lvbjp0eXBlK3JlZ2lvbjp5ZWFyK21vbnRoOnR5cGUrbW9udGg6eWVhcittb250aDpyZWdpb24reDQwNDY6dG90YWxfdm9sdW1lK3g0MjI1OnRvdGFsX3ZvbHVtZSt0b3RhbF9iYWdzOng0MjI1K3RvdGFsX2JhZ3M6eDQ3NzArc21hbGxfYmFnczp0b3RhbF92b2x1bWUrc21hbGxfYmFnczp4NDA0NitzbWFsbF9iYWdzOng0MjI1K3NtYWxsX2JhZ3M6eDQ3NzArc21hbGxfYmFnczp0b3RhbF9iYWdzK2xhcmdlX2JhZ3M6dG90YWxfdm9sdW1lK2xhcmdlX2JhZ3M6c21hbGxfYmFncyt0eXBlOnRvdGFsX3ZvbHVtZSt0eXBlOng0MjI1K3R5cGU6dG90YWxfYmFncyt5ZWFyOnRvdGFsX3ZvbHVtZSt5ZWFyOng0MDQ2K3llYXI6eDQ3NzAreWVhcjp0b3RhbF9iYWdzK3llYXI6bGFyZ2VfYmFncytyZWdpb246dG90YWxfdm9sdW1lK3JlZ2lvbjp4NDIyNStyZWdpb246dG90YWxfYmFncytyZWdpb246bGFyZ2VfYmFncyttb250aDp0b3RhbF92b2x1bWUrbW9udGg6eDQ3NzArbW9udGg6c21hbGxfYmFncywgdHJhaW4pIA0KDQpzdW1tYXJ5KGxtX211bHRpKQ0KZ2xhbmNlKGxtX211bHRpKQ0KYGBgDQoNCmBgYHtyfQ0KdHJhaW5fcm1zZSA8LSB0cmFpbiAlPiUgDQogIGFkZF9yZXNpZHVhbHMobG1fbXVsdGkpICU+JSANCiAgbXV0YXRlKHNxX3Jlc2lkID0gcmVzaWReMikgJT4lIA0KICBzdW1tYXJpc2UobXNlID0gbWVhbihzcV9yZXNpZCksDQogICAgICAgICAgICBybXNlID0gbXNlXjAuNSkgJT4lIA0KICBwdWxsKHJtc2UpDQoNCg0KdHJhaW5fcm1zZQ0KDQpwcmVkaWN0aW9uc190ZXN0IDwtIHRlc3QgJT4lIA0KICBhZGRfcHJlZGljdGlvbnMobG1fbXVsdGkpICU+JSANCiAgYWRkX3Jlc2lkdWFscyhsbV9tdWx0aSkgJT4lIA0KICBkcGx5cjo6c2VsZWN0KGF2ZXJhZ2VfcHJpY2UsIHByZWQsIHJlc2lkKSANCiAgDQp0ZXN0X3JzbWUgPC0gcHJlZGljdGlvbnNfdGVzdCAlPiUgDQogIG11dGF0ZShzcV9yZXNpZCA9IHJlc2lkXjIpICU+JSANCiAgc3VtbWFyaXNlKG1zZSA9IG1lYW4oc3FfcmVzaWQpLA0KICAgICAgICAgICAgcm1zZSA9IG1zZV4wLjUpICU+JSANCiAgcHVsbChybXNlKQ0KDQp0ZXN0X3JzbWUgLyB0cmFpbl9ybXNlDQoNCiMgb3ZlciBmaXQ/IGRlc3BpdGUgaGF2aW5nIGEgZ29vZCBiaWMNCmBgYA0KYGBge3J9DQpnbG11bHRpX2ZpdCA8LSBnbG11bHRpKA0KICBsb2coYXZlcmFnZV9wcmljZSkgfiAuLCAjIG1vZGVsIHRvIGZpdCwgaW4gdGhpcyBjYXNlLCBjaGFyZ2VzIHZhcmllcyB3aXRoIGV2ZXJ5dGhpbmcNCiAgbGV2ZWwgPSAyLCAjIGxldmVsID0gMiBtZWFucyB0cnkgcGFpcndpc2UgaW50ZXJhY3Rpb25zLiBsZXZlbCA9IDEgbWVhbnMgbWFpbiBlZmZlY3RzIG9ubHkNCiAgZGF0YSA9IHRyYWluLCAjIGRhdGEgdG8gdXNlIGZvciBmaXR0aW5nDQogIG1pbnNpemUgPSAwLCAjIG1pbiBzaXplIG9mIG1vZGVsIHRvIHRyeSwgaW4gbnVtYmVyIG9mIHByZWRpY3RvcnMNCiAgbWF4c2l6ZSA9IDEwLCAjIG1heCBzaXplIHRvIHRyeSwgc2V0IHRvIC0xIGZvciB1bmxpbWl0ZWQNCiAgbWFyZ2luYWxpdHkgPSBUUlVFLCAjIG1hcmdpbmFsaXR5IHRydWUgbWVhbnMgaW5jbHVkZSBwYWlyd2lzZSBpbnRlcmFjdGlvbiBvbmx5IGlmIGJvdGggbWFpbiBlZmZlY3RzIHByZXNlbnQgaW4gbW9kZWwuICANCiAgbWV0aG9kID0gImQiLCAjIG1ldGhvZCAiZCIgbWVhbnMgdHJpYWwgcnVuLCB0byBnZXQgc2l6ZSBvZiBwcm9ibGVtLiBTZXQgdG8gImgiIGZvciBleGhhdXN0aXZlIHNlYXJjaCwgb3IgImciIGZvciBnZW5ldGljIGFsZ29yaXRobQ0KICBjb25mc2V0c2l6ZSA9IDEwMDAsICMgaG93IG1hbnkgbW9kZWxzIHNob3VsZCBnbG11bHRpKCkgcmV0dXJuPyBNdXN0IGJlIGxlc3MgdGhhbiB0b3RhbCBzaXplIG9mIHByb2JsZW0NCiAgcGxvdHR5ID0gRkFMU0UsICMgcHJvdmlkZSBwcm9ncmVzcyBwbG90cz8gR2VuZXJhbGx5IGFubm95aW5nLg0KICByZXBvcnQgPSBUUlVFLCAjIHByb3ZpZGUgcHJvZ3Jlc3MgcmVwb3J0cz8gR2VuZXJhbGx5IHVzZWZ1bC4NCiAgZml0ZnVuY3Rpb24gPSBsbSwgIyB1c2UgbG0oKSBhcyBmaXQgZnVuY3Rpb24uIENhbiBhbHNvIHVzZSBnbG0oKSBmb3IgbG9naXN0aWMgcmVncmVzc2lvbi4NCiAgY3JpdCA9IGFpYyAjIGNyaXRlcmlvbiBmb3Igc2VsZWN0aW5nIGJlc3QgbW9kZWxzLiANCikNCmBgYA0KDQoNCmBgYHtyfQ0KdHJhaW4gPSB0cmFpbiAlPiUgDQogIHNlbGVjdChhdmVyYWdlX3ByaWNlLCByZWdpb24sIHR5cGUsIHllYXIsIG1vbnRoLCB0b3RhbF92b2x1bWUsIGxhcmdlX2JhZ3MpDQoNCg0KYGBgDQoNCg0KDQoNCg0KYGBge3J9DQpsbV9tdWx0aSA8LSBsbShsb2coYXZlcmFnZV9wcmljZSl+IDEgKyByZWdpb24gKyB0eXBlICsgeWVhciArIG1vbnRoICsgdG90YWxfdm9sdW1lICsgbGFyZ2VfYmFncyArIHllYXI6dHlwZSArIG1vbnRoOnllYXIgKyByZWdpb246dG90YWxfdm9sdW1lICsgdHlwZTp0b3RhbF92b2x1bWUsIHRyYWluKQ0KDQpzdW1tYXJ5KGxtX211bHRpKQ0KZ2xhbmNlKGxtX211bHRpKQ0KcGxvdChsbV9tdWx0aSkNCg0KbG0xMiA8LSBsbShsb2coYXZlcmFnZV9wcmljZSkgfiB0eXBlICsgcmVnaW9uICsgbW9udGggKyB5ZWFyICsgeDQwNDYgKyB0b3RhbF92b2x1bWUgKyBsYXJnZV9iYWdzICsgdHlwZTp0b3RhbF92b2x1bWUgKyB0eXBlOnJlZ2lvbiArIHR5cGU6eWVhciArIHJlZ2lvbjp0b3RhbF92b2x1bWUsIHRyYWluKQ0Kc3VtbWFyeShsbTEyKQ0KZ2xhbmNlKGxtMTIpDQpwbG90KGxtMTIpDQpgYGANCg0K